├── universe ├── envs │ ├── tests │ │ ├── __init__.py │ │ └── dusk-drive.png │ ├── vnc_core_env │ │ ├── __init__.py │ │ ├── translator.py │ │ └── vnc_core_env.py │ ├── vnc_internet.py │ ├── vnc_flashgames.py │ ├── vnc_gtav.py │ ├── vnc_wog.py │ ├── __init__.py │ ├── vnc_starcraft.py │ └── dummy_vnc_env.py ├── remotes │ ├── compose │ │ ├── __init__.py │ │ ├── signals.py │ │ ├── colors.py │ │ ├── utils.py │ │ └── progress_stream.py │ ├── __init__.py │ ├── remote.py │ ├── build.py │ ├── hardcoded_addresses.py │ └── healthcheck.py ├── vncdriver │ ├── vendor │ │ └── __init__.py │ ├── error.py │ ├── screen │ │ ├── base.py │ │ ├── __init__.py │ │ ├── screen_buffer.py │ │ └── pyglet_screen.py │ ├── README.md │ ├── __init__.py │ ├── auth.py │ ├── fbs_writer.py │ ├── fbs_reader.py │ ├── dual_proxy_server.py │ ├── vnc_session.py │ ├── libvnc_session.py │ └── constants.py ├── runtimes │ ├── .agignore │ ├── __init__.py │ └── registration.py ├── kube │ ├── __init__.py │ └── discovery.py ├── spaces │ ├── diagnostics.py │ ├── vnc_observation_space.py │ ├── __init__.py │ ├── hardcoded.py │ ├── joystick_event.py │ ├── joystick_action_space.py │ ├── vnc_event.py │ └── vnc_action_space.py ├── runtimes.yml ├── wrappers │ ├── experimental │ │ ├── __init__.py │ │ ├── random_env.py │ │ ├── observation.py │ │ └── action_space.py │ ├── action_space.py │ ├── diagnostics.py │ ├── tests │ │ ├── test_joint.py │ │ └── test_time_limit.py │ ├── vision.py │ ├── timer.py │ ├── render.py │ ├── time_limit.py │ ├── __init__.py │ ├── blocking_reset.py │ ├── joint.py │ ├── vectorize.py │ ├── multiprocessing_env.py │ ├── monitoring.py │ ├── gym_core_sync.py │ ├── gym_core.py │ └── throttle.py ├── vectorized │ ├── __init__.py │ ├── tests │ │ └── test_monitoring.py │ ├── core.py │ └── vectorize_filter.py ├── error.py ├── rewarder │ ├── __init__.py │ ├── connection_timer.py │ ├── merge.py │ ├── tests │ │ └── test_reward_buffer.py │ └── env_status.py ├── configuration.py ├── twisty.py ├── scoreboard │ └── __init__.py └── utils │ ├── display.py │ └── __init__.py ├── .dockerignore ├── doc ├── dusk-drive.png └── env_semantics.rst ├── example ├── starter-cluster │ └── starter-cluster-requirements.txt ├── random-agent │ └── random-agent.py ├── recorders │ ├── vnc_recorder.py │ ├── reward_recorder.py │ └── botaction_recorder.py └── system-diagnostics │ └── system_diagnostics_logger.py ├── .gitignore ├── test.dockerfile ├── ISSUE_TEMPLATE ├── tox.ini ├── CODE_OF_CONDUCT.rst ├── Makefile ├── setup.py ├── LICENSE ├── Dockerfile ├── .travis.yml └── tests └── functional ├── test_envs.py └── test_core_envs_semantics.py /universe/envs/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /universe/remotes/compose/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /universe/vncdriver/vendor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /universe/runtimes/.agignore: -------------------------------------------------------------------------------- 1 | flashgames.json 2 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | vncdriver/logs 2 | .git 3 | *.pyc 4 | -------------------------------------------------------------------------------- /universe/vncdriver/error.py: -------------------------------------------------------------------------------- 1 | class Error(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /doc/dusk-drive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openai/universe/HEAD/doc/dusk-drive.png -------------------------------------------------------------------------------- /universe/kube/__init__.py: -------------------------------------------------------------------------------- 1 | from universe.kube.discovery import discover, discover_batches 2 | -------------------------------------------------------------------------------- /universe/spaces/diagnostics.py: -------------------------------------------------------------------------------- 1 | class DiagnosticEvent(object): 2 | pass 3 | 4 | PeekReward = DiagnosticEvent() 5 | -------------------------------------------------------------------------------- /universe/envs/tests/dusk-drive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openai/universe/HEAD/universe/envs/tests/dusk-drive.png -------------------------------------------------------------------------------- /universe/runtimes.yml: -------------------------------------------------------------------------------- 1 | flashgames: 2 | tag: 0.20.28 3 | gym-core: 4 | tag: 0.20.6 5 | world-of-bits: 6 | tag: 0.20.0 7 | -------------------------------------------------------------------------------- /universe/vncdriver/screen/base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | class Screen(object): 7 | pass 8 | -------------------------------------------------------------------------------- /example/starter-cluster/starter-cluster-requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.4.2 2 | click>=6.6 3 | docker-py==1.10.6 4 | PyYAML>=3.12 5 | universe>=0.1.0 6 | docker-compose>=1.9.0 7 | -------------------------------------------------------------------------------- /universe/envs/vnc_core_env/__init__.py: -------------------------------------------------------------------------------- 1 | from universe.envs.vnc_core_env.vnc_core_env import GymCoreEnv, GymCoreSyncEnv 2 | from universe.envs.vnc_core_env.translator import AtariTranslator, CartPoleTranslator 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ropeproject 3 | *.pyc 4 | tags 5 | *.swo 6 | *.swp 7 | *.sqlite 8 | upload/ 9 | venv 10 | dist 11 | *.egg-info 12 | .idea 13 | /logs 14 | /dist 15 | build/ 16 | /.gitfiles 17 | /.tox 18 | /.cache 19 | -------------------------------------------------------------------------------- /universe/envs/vnc_internet.py: -------------------------------------------------------------------------------- 1 | from universe.envs import vnc_env 2 | 3 | class InternetEnv(vnc_env.VNCEnv): 4 | def __init__(self): 5 | super(InternetEnv, self).__init__() 6 | self._probe_key = 0x60 # backtick ` 7 | -------------------------------------------------------------------------------- /universe/envs/vnc_flashgames.py: -------------------------------------------------------------------------------- 1 | from universe.envs import vnc_env 2 | 3 | class FlashgamesEnv(vnc_env.VNCEnv): 4 | def __init__(self): 5 | super(FlashgamesEnv, self).__init__() 6 | self._probe_key = 0x60 # backtick ` 7 | -------------------------------------------------------------------------------- /universe/remotes/__init__.py: -------------------------------------------------------------------------------- 1 | from universe.remotes.hardcoded_addresses import HardcodedAddresses 2 | from universe.remotes.allocator_remote import AllocatorManager 3 | from universe.remotes.docker_remote import DockerManager 4 | from universe.remotes.build import build 5 | -------------------------------------------------------------------------------- /universe/spaces/vnc_observation_space.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | class VNCObservationSpace(gym.Space): 4 | # For now, we leave the VNC ObservationSpace wide open, since 5 | # there isn't much use-case for this object. 6 | def contains(self, x): 7 | return True 8 | -------------------------------------------------------------------------------- /universe/wrappers/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | from universe.wrappers.experimental.action_space import SafeActionSpace, SoftmaxClickMouse 2 | from universe.wrappers.experimental.observation import CropObservations 3 | from universe.wrappers.experimental.random_env import RandomEnv 4 | -------------------------------------------------------------------------------- /universe/vectorized/__init__.py: -------------------------------------------------------------------------------- 1 | from universe.vectorized.core import Env, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper 2 | from universe.vectorized.multiprocessing_env import MultiprocessingEnv 3 | from universe.vectorized.vectorize_filter import Filter, VectorizeFilter 4 | -------------------------------------------------------------------------------- /universe/error.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class Error(Exception): 4 | pass 5 | 6 | class RPCError(Error): 7 | pass 8 | 9 | class ConnectionError(Error): 10 | pass 11 | 12 | class TimeoutError(Error): 13 | pass 14 | 15 | class AuthenticationError(Error): 16 | pass 17 | -------------------------------------------------------------------------------- /universe/vncdriver/screen/__init__.py: -------------------------------------------------------------------------------- 1 | from universe.vncdriver.screen.base import Screen 2 | from universe.vncdriver.screen.numpy_screen import NumpyScreen 3 | from universe.vncdriver.screen.pyglet_screen import PygletScreen 4 | from universe.vncdriver.screen.screen_buffer import ScreenBuffer 5 | -------------------------------------------------------------------------------- /test.dockerfile: -------------------------------------------------------------------------------- 1 | FROM quay.io/openai/universe 2 | 3 | RUN pip install tox 4 | 5 | # Upload our actual code 6 | WORKDIR /usr/local/universe/ 7 | COPY . ./ 8 | 9 | # Run tox. Keep printing so Travis knows we're alive. 10 | CMD ["bash", "-c", "( while true; do echo '.'; sleep 60; done ) & tox"] 11 | -------------------------------------------------------------------------------- /universe/vncdriver/README.md: -------------------------------------------------------------------------------- 1 | # Python VNC driver implementation 2 | 3 | This Python VNC driver is using an older API, and needs a small amount 4 | of work to once again become a good backend. We haven't bothered with 5 | this since the Go driver is much faster. We would take a pull request 6 | to fix it though! 7 | -------------------------------------------------------------------------------- /universe/rewarder/__init__.py: -------------------------------------------------------------------------------- 1 | from universe.rewarder.rewarder_session import RewarderSession 2 | from universe.rewarder.env_status import EnvStatus, compare_ids 3 | from universe.rewarder.merge import merge_n, merge_infos, merge_reward_n, merge_observation_n 4 | from universe.rewarder.reward_buffer import RewardBuffer 5 | -------------------------------------------------------------------------------- /universe/vncdriver/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from universe.vncdriver.vnc_session import VNCSession 4 | from universe.vncdriver.vnc_client import client_factory 5 | from universe.vncdriver.screen import NumpyScreen, PygletScreen 6 | 7 | logger = logging.getLogger(__name__) 8 | logger.setLevel(logging.INFO) 9 | -------------------------------------------------------------------------------- /universe/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from universe.spaces.hardcoded import Hardcoded 2 | from universe.spaces.vnc_action_space import VNCActionSpace 3 | from universe.spaces.vnc_event import VNCEvent, KeyEvent, PointerEvent 4 | from universe.spaces.vnc_observation_space import VNCObservationSpace 5 | 6 | from universe.spaces.diagnostics import PeekReward 7 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE: -------------------------------------------------------------------------------- 1 | 2 | (First, please check https://github.com/openai/universe/wiki/Solutions-to-common-problems for solutions to many common problems) 3 | 4 | ### Expected behavior 5 | 6 | ### Actual behavior 7 | 8 | ### Versions 9 | Please include the result of running 10 | ``` 11 | $ uname -a ; python --version; pip show universe gym tensorflow numpy go-vncdriver Pillow 12 | ``` 13 | -------------------------------------------------------------------------------- /universe/envs/vnc_gtav.py: -------------------------------------------------------------------------------- 1 | from universe.envs import vnc_env 2 | from universe.spaces.joystick_action_space import JoystickActionSpace 3 | 4 | 5 | class GTAVEnv(vnc_env.VNCEnv): 6 | def __init__(self): 7 | super(GTAVEnv, self).__init__() 8 | self.action_space = JoystickActionSpace(axis_x=True, axis_z=True) 9 | self._send_actions_over_websockets = True 10 | self._skip_network_calibration = True 11 | 12 | -------------------------------------------------------------------------------- /universe/spaces/hardcoded.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import prng 2 | 3 | class Hardcoded(object): 4 | def __init__(self, actions): 5 | self.actions = actions 6 | 7 | def contains(self, action): 8 | return action in self.actions 9 | 10 | def sample(self): 11 | i = prng.np_random.randint(len(self.actions)) 12 | return self.actions[i] 13 | 14 | def __getitem__(self, i): 15 | return self.actions[i] 16 | -------------------------------------------------------------------------------- /universe/configuration.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from gym import configuration 3 | 4 | universe_logger = logging.getLogger('universe') 5 | universe_logger.setLevel(logging.INFO) 6 | 7 | extra_logger = logging.getLogger('universe.extra') 8 | extra_logger.setLevel(logging.INFO) 9 | 10 | if hasattr(configuration, '_extra_loggers'): 11 | configuration._extra_loggers.append(universe_logger) 12 | configuration._extra_loggers.append(extra_logger) 13 | -------------------------------------------------------------------------------- /universe/envs/vnc_wog.py: -------------------------------------------------------------------------------- 1 | from universe.envs import vnc_env 2 | from universe.spaces import VNCActionSpace 3 | 4 | 5 | class WorldOfGooEnv(vnc_env.VNCEnv): 6 | def __init__(self): 7 | super(WorldOfGooEnv, self).__init__() 8 | # TODO: set action space screen shape to match 9 | # HACK: empty keys list fails for some weird reason, give it an 'a' 10 | self.action_space = VNCActionSpace(keys=['a'], buttonmasks=[1]) 11 | -------------------------------------------------------------------------------- /universe/envs/__init__.py: -------------------------------------------------------------------------------- 1 | import universe.envs.vnc_env 2 | from universe.envs.vnc_env import VNCEnv 3 | from universe.envs.vnc_core_env import GymCoreEnv, GymCoreSyncEnv 4 | from universe.envs.vnc_flashgames import FlashgamesEnv 5 | from universe.envs.vnc_internet import InternetEnv 6 | from universe.envs.vnc_starcraft import StarCraftEnv 7 | from universe.envs.vnc_gtav import GTAVEnv 8 | from universe.envs.vnc_wog import WorldOfGooEnv 9 | from universe.envs.dummy_vnc_env import DummyVNCEnv 10 | -------------------------------------------------------------------------------- /universe/remotes/compose/signals.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import unicode_literals 3 | 4 | import signal 5 | 6 | 7 | class ShutdownException(Exception): 8 | pass 9 | 10 | 11 | def shutdown(signal, frame): 12 | raise ShutdownException() 13 | 14 | 15 | def set_signal_handler(handler): 16 | signal.signal(signal.SIGINT, handler) 17 | signal.signal(signal.SIGTERM, handler) 18 | 19 | 20 | def set_signal_handler_to_shutdown(): 21 | set_signal_handler(shutdown) 22 | -------------------------------------------------------------------------------- /universe/remotes/remote.py: -------------------------------------------------------------------------------- 1 | class Remote(object): 2 | def __init__(self, handle, vnc_address, vnc_password, rewarder_address, rewarder_password, name=None): 3 | self.name = name 4 | self.handle = handle 5 | self.vnc_address = vnc_address 6 | self.vnc_password = vnc_password 7 | self.rewarder_address = rewarder_address 8 | self.rewarder_password = rewarder_password 9 | 10 | def __str__(self): 11 | return 'Remote<{}:{}>'.format(self.handle, self.name) 12 | 13 | def __repr__(self): 14 | return str(self) 15 | -------------------------------------------------------------------------------- /universe/wrappers/action_space.py: -------------------------------------------------------------------------------- 1 | class SoftmaxClickMouse(): 2 | def init(self): 3 | raise DeprecationWarning('DEPRECATION WARNING: wrappers.SoftmaxClickMouse has been moved to wrappers.experimental.action_space.SoftmaxClickMouse as of 2017-02-08.') 4 | 5 | 6 | class SafeActionSpace(): 7 | def init(self): 8 | raise DeprecationWarning('DEPRECATION WARNING: wrappers.SafeActionSpace has been moved to ' 9 | 'wrappers.experimental.action_space.SafeActionSpace as of 2017-01-07. ' 10 | 'Using legacy wrappers.SafeActionSpace will soon be removed') 11 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py27, py35 8 | skipsdist=True 9 | 10 | [testenv] 11 | passenv=DISPLAY DOCKER_USERNAME DOCKER_PASSWORD FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES TRAVIS* 12 | deps = 13 | pytest 14 | gym[atari]<0.9 15 | docker-py==1.10.3 16 | Pillow 17 | autobahn 18 | twisted 19 | ujson 20 | boto 21 | commands = 22 | pip install -e /usr/local/universe 23 | pytest {posargs} 24 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | OpenAI is dedicated to providing a harassment-free experience for 2 | everyone, regardless of gender, gender identity and expression, sexual 3 | orientation, disability, physical appearance, body size, age, race, or 4 | religion. We do not tolerate harassment of participants in any form. 5 | 6 | This code of conduct applies to all OpenAI spaces both online and 7 | off. Anyone who violates this code of conduct may be sanctioned or 8 | expelled from these spaces at the discretion of the OpenAI team. 9 | 10 | We may add additional rules over time, which will be made clearly 11 | available to participants. Participants are responsible for knowing 12 | and abiding by these rules. 13 | -------------------------------------------------------------------------------- /universe/wrappers/diagnostics.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import six 3 | from universe import pyprofile, vectorized 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | # Not used in core; but used in play_flashgames 8 | class Diagnostics(vectorized.Wrapper): 9 | 10 | def _step(self, action_n): 11 | observation_n, reward_n, done_n, info = self.env.step(action_n) 12 | # We want this to be above Mask, so we know whether or not a 13 | # particular index is resetting. 14 | if self.unwrapped.diagnostics: 15 | with pyprofile.push('vnc_env.diagnostics.add_metadata'): 16 | self.unwrapped.diagnostics.add_metadata(observation_n, info['n']) 17 | return observation_n, reward_n, done_n, info 18 | -------------------------------------------------------------------------------- /universe/wrappers/tests/test_joint.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import universe 3 | from universe import wrappers 4 | 5 | def test_joint(): 6 | env1 = gym.make('test.DummyVNCEnv-v0') 7 | env2 = gym.make('test.DummyVNCEnv-v0') 8 | env1.configure(_n=3) 9 | env2.configure(_n=3) 10 | for reward_buffer in [env1._reward_buffers[0], env2._reward_buffers[0]]: 11 | reward_buffer.set_env_info('running', 'test.DummyVNCEnv-v0', '1', 60) 12 | reward_buffer.reset('1') 13 | reward_buffer.push('1', 10, False, {}) 14 | 15 | env = wrappers.Joint([env1, env2]) 16 | assert env.n == 6 17 | observation_n = env.reset() 18 | assert observation_n == [None] * 6 19 | 20 | observation_n, reward_n, done_n, info = env.step([[] for _ in range(env.n)]) 21 | assert reward_n == [10.0, 0.0, 0.0, 10.0, 0.0, 0.0] 22 | assert done_n == [False] * 6 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | upload: 2 | rm -rf dist 3 | python setup.py sdist 4 | twine upload dist/* 5 | 6 | test: 7 | find . -name '*.pyc' -delete 8 | docker build -f test.dockerfile -t quay.io/openai/universe:test . 9 | docker run -v /usr/bin/docker:/usr/bin/docker -v /root/.docker:/root/.docker -v /var/run/docker.sock:/var/run/docker.sock --net=host quay.io/openai/universe:test 10 | 11 | build: 12 | find . -name '*.pyc' -delete 13 | docker build -t quay.io/openai/universe . 14 | docker build -f test.dockerfile -t quay.io/openai/universe:test . 15 | 16 | push: 17 | find . -name '*.pyc' -delete 18 | docker build -t quay.io/openai/universe . 19 | docker build -f test.dockerfile -t quay.io/openai/universe:test . 20 | 21 | docker push quay.io/openai/universe 22 | docker push quay.io/openai/universe:test 23 | 24 | test-push: 25 | docker build -f test.dockerfile -t quay.io/openai/universe:test . 26 | docker push quay.io/openai/universe:test 27 | -------------------------------------------------------------------------------- /universe/wrappers/vision.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from universe import vectorized 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | class Vision(vectorized.Wrapper): 7 | """ 8 | At present, an observation from a vectorized universe environment returns a list of 9 | dicts. Each dict contains input data for each modality. Modalities include 'vision' 10 | and 'text', and it is possible to add other modalities in the future (such as 'audio'). 11 | 12 | The Vision wrapper extracts the vision modality and discards all others. This is convenient 13 | when we only care about the visual input. 14 | """ 15 | 16 | def _reset(self): 17 | observation_n = self.env.reset() 18 | return [ob['vision'] if ob is not None else ob for ob in observation_n] 19 | 20 | def _step(self, action_n): 21 | observation_n, reward_n, done_n, info_n = self.env.step(action_n) 22 | observation_n = [ob['vision'] if ob is not None else ob for ob in observation_n] 23 | return observation_n, reward_n, done_n, info_n 24 | -------------------------------------------------------------------------------- /universe/wrappers/experimental/random_env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from universe import vectorized 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | class RandomEnv(vectorized.Wrapper): 7 | ''' 8 | Randomly sample from a list of env_ids between episodes. 9 | 10 | Passes a list of env_ids to configure. When done=True, calls env.reset() 11 | to sample from the list. 12 | ''' 13 | def __init__(self, env, env_ids): 14 | super(RandomEnv, self).__init__(env) 15 | self.env_ids = env_ids 16 | 17 | def configure(self, **kwargs): 18 | super(RandomEnv, self).configure(sample_env_ids=self.env_ids, **kwargs) 19 | 20 | def _reset(self): 21 | observation_n = self.env.reset() 22 | return [ob['vision'] if ob is not None else ob for ob in observation_n] 23 | 24 | def _step(self, action_n): 25 | assert self.n == 1 26 | observation, reward, done, info = self.env.step(action_n) 27 | if any(done): 28 | self.env.reset() 29 | return observation, reward, done, info 30 | -------------------------------------------------------------------------------- /universe/wrappers/timer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | from universe import pyprofile, vectorized 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | class Timer(vectorized.Wrapper): 8 | """ 9 | Calculate how much time was spent actually doing work. Display result 10 | via pyprofile. 11 | """ 12 | 13 | def configure(self, **kwargs): 14 | self.env.configure(**kwargs) 15 | 16 | def _reset(self): 17 | with pyprofile.push('vnc_env.Timer.reset'): 18 | return self.env.reset() 19 | 20 | def _step(self, action_n): 21 | start = time.time() 22 | with pyprofile.push('vnc_env.Timer.step'): 23 | observation_n, reward_n, done_n, info = self.env.step(action_n) 24 | 25 | # Calculate how much time was spent actually doing work 26 | sleep = info.get('stats.throttle.sleep') 27 | if sleep is None or sleep < 0: 28 | sleep = 0 29 | pyprofile.timing('vnc_env.Timer.step.excluding_sleep', time.time() - start - sleep) 30 | return observation_n, reward_n, done_n, info 31 | -------------------------------------------------------------------------------- /universe/remotes/compose/colors.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import unicode_literals 3 | NAMES = [ 4 | 'grey', 5 | 'red', 6 | 'green', 7 | 'yellow', 8 | 'blue', 9 | 'magenta', 10 | 'cyan', 11 | 'white' 12 | ] 13 | 14 | 15 | def get_pairs(): 16 | for i, name in enumerate(NAMES): 17 | yield(name, str(30 + i)) 18 | yield('intense_' + name, str(30 + i) + ';1') 19 | 20 | 21 | def ansi(code): 22 | return '\033[{0}m'.format(code) 23 | 24 | 25 | def ansi_color(code, s): 26 | return '{0}{1}{2}'.format(ansi(code), s, ansi(0)) 27 | 28 | 29 | def make_color_fn(code): 30 | return lambda s: ansi_color(code, s) 31 | 32 | 33 | for (name, code) in get_pairs(): 34 | globals()[name] = make_color_fn(code) 35 | 36 | 37 | def rainbow(): 38 | cs = ['cyan', 'yellow', 'green', 'magenta', 'red', 'blue', 39 | 'intense_cyan', 'intense_yellow', 'intense_green', 40 | 'intense_magenta', 'intense_red', 'intense_blue'] 41 | 42 | for c in cs: 43 | yield globals()[c] 44 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='universe', 4 | version='0.21.5', 5 | description="Universe: a software platform for measuring and training an AI's general intelligence across the world's supply of games, websites and other applications.", 6 | url='https://github.com/openai/universe', 7 | author='OpenAI', 8 | author_email='universe@openai.com', 9 | packages=[package for package in find_packages() 10 | if package.startswith('universe')], 11 | install_requires=[ 12 | 'autobahn>=0.16.0', 13 | 'docker-py==1.10.3', 14 | 'docker-pycreds==0.2.1', 15 | 'fastzbarlight>=0.0.13', 16 | 'go-vncdriver>=0.4.8', 17 | 'gym>=0.8.1', 18 | 'Pillow>=3.3.0', 19 | 'PyYAML>=3.12', 20 | 'six>=1.10.0', 21 | 'twisted>=16.5.0', 22 | 'ujson>=1.35', 23 | ], 24 | package_data={'universe': ['runtimes.yml', 'runtimes/flashgames.json']}, 25 | tests_require=['pytest'], 26 | extras_require={ 27 | 'atari': 'gym[atari]', 28 | } 29 | ) 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2016 OpenAI (http://openai.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /universe/spaces/joystick_event.py: -------------------------------------------------------------------------------- 1 | class JoystickEvent(object): 2 | pass 3 | 4 | 5 | class JoystickAxisEvent(JoystickEvent): 6 | def __init__(self, amount): 7 | self.amount = float(amount) 8 | 9 | def __repr__(self): 10 | return str(type(self)) + ''.format(self.amount) 11 | 12 | def __str__(self): 13 | return repr(self) 14 | 15 | def __hash__(self): 16 | return self.amount.__hash__() 17 | 18 | def __eq__(self, other): 19 | return type(other) == type(self) and \ 20 | other.amount == self.amount 21 | 22 | def compile(self): 23 | return type(self).__name__, self.amount 24 | 25 | 26 | class JoystickAxisXEvent(JoystickAxisEvent): 27 | pass 28 | 29 | 30 | class JoystickAxisYEvent(JoystickAxisEvent): 31 | pass 32 | 33 | 34 | class JoystickAxisZEvent(JoystickAxisEvent): 35 | pass 36 | 37 | 38 | class JoystickAxisRxEvent(JoystickAxisEvent): 39 | pass 40 | 41 | 42 | class JoystickAxisRyEvent(JoystickAxisEvent): 43 | pass 44 | 45 | 46 | class JoystickAxisRzEvent(JoystickAxisEvent): 47 | pass 48 | 49 | 50 | class JoystickSlider0Event(JoystickAxisEvent): 51 | pass 52 | 53 | 54 | class JoystickSlider1Event(JoystickAxisEvent): 55 | pass 56 | -------------------------------------------------------------------------------- /universe/vncdriver/auth.py: -------------------------------------------------------------------------------- 1 | import six 2 | import uuid 3 | 4 | from universe import utils 5 | from universe.vncdriver.vendor import pydes 6 | 7 | class RFBDes(pydes.des): 8 | def setKey(self, key): 9 | key = key.encode('ascii') 10 | 11 | newkey = [] 12 | for ki in range(len(key)): 13 | if six.PY2: 14 | bsrc = ord(key[ki]) 15 | else: 16 | bsrc = key[ki] 17 | 18 | # Reverse the bits 19 | btgt = 0 20 | for i in range(8): 21 | if bsrc & (1 << i): 22 | btgt = btgt | (1 << 7-i) 23 | 24 | if six.PY2: 25 | newkey.append(chr(btgt)) 26 | else: 27 | newkey.append(btgt) 28 | 29 | super(RFBDes, self).setKey(newkey) 30 | 31 | def challenge(): 32 | length = 16 33 | buf = b'' 34 | while len(buf) < length: 35 | entropy = uuid.uuid4().bytes 36 | buf += entropy 37 | return buf[:length] 38 | 39 | def challenge_response(challenge, password=None): 40 | if password is None: 41 | password = utils.default_password() 42 | password += ((8 - len(password)) % 8) * '\0' # pad to multiple of 8 bytes 43 | des = RFBDes(password) 44 | return des.encrypt(challenge) 45 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y libav-tools \ 5 | python3-numpy \ 6 | python3-scipy \ 7 | python3-setuptools \ 8 | python3-pip \ 9 | libpq-dev \ 10 | libjpeg-dev \ 11 | curl \ 12 | cmake \ 13 | swig \ 14 | python3-opengl \ 15 | libboost-all-dev \ 16 | libsdl2-dev \ 17 | wget \ 18 | unzip \ 19 | git \ 20 | golang \ 21 | net-tools \ 22 | iptables \ 23 | libvncserver-dev \ 24 | software-properties-common \ 25 | && apt-get clean \ 26 | && rm -rf /var/lib/apt/lists/* 27 | 28 | RUN ln -sf /usr/bin/pip3 /usr/local/bin/pip \ 29 | && ln -sf /usr/bin/python3 /usr/local/bin/python \ 30 | && pip install -U pip 31 | 32 | # Install gym 33 | RUN pip install gym[all] 34 | 35 | # Get the faster VNC driver 36 | RUN pip install go-vncdriver>=0.4.0 37 | 38 | # Install pytest (for running test cases) 39 | RUN pip install pytest 40 | 41 | # Force the container to use the go vnc driver 42 | ENV UNIVERSE_VNCDRIVER='go' 43 | 44 | WORKDIR /usr/local/universe/ 45 | 46 | # Cachebusting 47 | COPY ./setup.py ./ 48 | COPY ./tox.ini ./ 49 | 50 | RUN pip install -e . 51 | 52 | # Upload our actual code 53 | COPY . ./ 54 | 55 | # Just in case any python cache files were carried over from the source directory, remove them 56 | RUN py3clean . 57 | -------------------------------------------------------------------------------- /universe/vectorized/tests/test_monitoring.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | import gym.monitoring 5 | from gym.monitoring.tests import helpers 6 | from universe import wrappers 7 | 8 | def test_multiprocessing_env_monitoring(): 9 | with helpers.tempdir() as temp: 10 | env = wrappers.WrappedMultiprocessingEnv('Pong-v3') 11 | env.configure(n=2) 12 | env = wrappers.Monitor(env, temp) 13 | env.reset() 14 | for i in range(2): 15 | env.step([0, 0]) 16 | env.close() 17 | manifests = glob.glob(os.path.join(temp, '*.video.*')) 18 | assert len(manifests) == 2, 'There are {} manifests: {}'.format(len(manifests), manifests) 19 | 20 | results = gym.monitoring.load_results(temp) 21 | assert results['env_info']['env_id'] == 'Pong-v3' 22 | 23 | def test_vnc_monitoring(): 24 | with helpers.tempdir() as temp: 25 | env = gym.make('gym-core.Pong-v3') 26 | env.configure(remotes=2) 27 | env = wrappers.GymCoreAction(env) 28 | env = wrappers.Monitor(env, temp) 29 | 30 | env.reset() 31 | for i in range(2): 32 | env.step([0, 0]) 33 | env.close() 34 | 35 | results = gym.monitoring.load_results(temp) 36 | assert results['env_info']['env_id'] == 'gym-core.Pong-v3' 37 | 38 | if __name__ == '__main__': 39 | test_multiprocessing_env_monitoring() 40 | test_vnc_monitoring() 41 | -------------------------------------------------------------------------------- /example/random-agent/random-agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import logging 4 | import sys 5 | 6 | import gym 7 | import universe # register the universe environments 8 | 9 | from universe import wrappers 10 | 11 | logger = logging.getLogger() 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser(description=None) 15 | parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') 16 | args = parser.parse_args() 17 | 18 | if args.verbosity == 0: 19 | logger.setLevel(logging.INFO) 20 | elif args.verbosity >= 1: 21 | logger.setLevel(logging.DEBUG) 22 | 23 | 24 | env = gym.make('flashgames.NeonRace-v0') 25 | env.configure(remotes=1) # automatically creates a local docker container 26 | 27 | # Restrict the valid random actions. (Try removing this and see 28 | # what happens when the agent is given full control of the 29 | # keyboard/mouse.) 30 | env = wrappers.experimental.SafeActionSpace(env) 31 | observation_n = env.reset() 32 | 33 | while True: 34 | # your agent here 35 | # 36 | # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True) 37 | action_n = [env.action_space.sample() for ob in observation_n] 38 | observation_n, reward_n, done_n, info = env.step(action_n) 39 | env.render() 40 | 41 | return 0 42 | 43 | if __name__ == '__main__': 44 | sys.exit(main()) 45 | -------------------------------------------------------------------------------- /universe/runtimes/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | from universe.runtimes.registration import register_runtime 5 | 6 | with open(os.path.join(os.path.dirname(__file__), '../runtimes.yml')) as f: 7 | spec = yaml.load(f) 8 | 9 | # If you have a local repo, do something like 10 | # export OPENAI_DOCKER_REPO=docker.openai.com (this one only for openai folks) 11 | docker_repo = os.environ.get('OPENAI_DOCKER_REPO', 'quay.io/openai') 12 | 13 | register_runtime( 14 | id='gym-core', 15 | kind='docker', 16 | image=docker_repo + '/universe.gym-core:{}'.format(spec['gym-core']['tag']), 17 | ) 18 | 19 | register_runtime( 20 | id='flashgames', 21 | kind='docker', 22 | image=docker_repo + '/universe.flashgames:{}'.format(spec['flashgames']['tag']), 23 | host_config={ 24 | 'privileged': True, 25 | 'cap_add': ['SYS_ADMIN'], 26 | 'ipc_mode': 'host', 27 | }, 28 | default_params={'cpu': 3.9, 'livestream_url': None}, 29 | server_registry_file=os.path.join(os.path.dirname(__file__), 'flashgames.json'), 30 | ) 31 | 32 | register_runtime( 33 | id='world-of-bits', 34 | kind='docker', 35 | image=docker_repo + '/universe.world-of-bits:{}'.format(spec['world-of-bits']['tag']), 36 | host_config={ 37 | 'privileged': True, 38 | 'cap_add': ['SYS_ADMIN'], 39 | 'ipc_mode': 'host' 40 | }) 41 | 42 | register_runtime( 43 | id='vnc-windows', 44 | kind='windows', 45 | ) 46 | 47 | del spec 48 | -------------------------------------------------------------------------------- /example/recorders/vnc_recorder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import logging 4 | import os 5 | import re 6 | import sys 7 | 8 | from universe import utils 9 | from universe.vncdriver import vnc_proxy_server 10 | from twisted.internet import protocol, reactor 11 | 12 | logger = logging.getLogger() 13 | 14 | def main(): 15 | parser = argparse.ArgumentParser(description=None) 16 | parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') 17 | parser.add_argument('-l', '--listen-address', default='0.0.0.0:5899', help='Address to listen on') 18 | parser.add_argument('-s', '--vnc-address', default='127.0.0.1:5900', help='Address of the VNC server to run on.') 19 | parser.add_argument('-d', '--logfile-dir', default=None, help='Base directory to write logs for each connection') 20 | args = parser.parse_args() 21 | 22 | if args.verbosity == 0: 23 | logger.setLevel(logging.INFO) 24 | elif args.verbosity >= 1: 25 | logger.setLevel(logging.DEBUG) 26 | 27 | factory = protocol.ServerFactory() 28 | factory.protocol = vnc_proxy_server.VNCProxyServer 29 | factory.vnc_address = 'tcp:{}'.format(args.vnc_address) 30 | factory.logfile_dir = args.logfile_dir 31 | factory.recorder_id = utils.random_alphanumeric().lower() 32 | 33 | host, port = args.listen_address.split(':') 34 | port = int(port) 35 | 36 | logger.info('Listening on %s:%s', host, port) 37 | reactor.listenTCP(port, factory, interface=host) 38 | reactor.run() 39 | return 0 40 | 41 | if __name__ == '__main__': 42 | sys.exit(main()) 43 | -------------------------------------------------------------------------------- /example/recorders/reward_recorder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import logging 4 | import sys 5 | 6 | from autobahn.twisted import websocket 7 | from universe.rewarder import reward_proxy_server 8 | from universe.twisty import reactor 9 | 10 | logger = logging.getLogger() 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser(description=None) 15 | parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') 16 | parser.add_argument('-l', '--listen-address', default='0.0.0.0:15898', help='Address to listen on') 17 | parser.add_argument('-s', '--rewarder-address', default='127.0.0.1:15900', help='Address of the reward server to run on.') 18 | parser.add_argument('-d', '--logfile-dir', default=None, help='Base directory to write logs for each connection') 19 | args = parser.parse_args() 20 | 21 | if args.verbosity == 0: 22 | logger.setLevel(logging.INFO) 23 | elif args.verbosity >= 1: 24 | logger.setLevel(logging.DEBUG) 25 | 26 | factory = websocket.WebSocketServerFactory() 27 | factory.protocol = reward_proxy_server.RewardProxyServer 28 | factory.rewarder_address = args.rewarder_address 29 | factory.logfile_dir = args.logfile_dir 30 | factory.setProtocolOptions(maxConnections=1) # We only write reward logs to one place, so only allow one connection 31 | 32 | host, port = args.listen_address.split(':') 33 | port = int(port) 34 | logger.info('Listening on %s:%s', host, port) 35 | reactor.listenTCP(port, factory) 36 | reactor.run() 37 | return 0 38 | 39 | if __name__ == '__main__': 40 | sys.exit(main()) 41 | -------------------------------------------------------------------------------- /universe/twisty.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from twisted.python.runtime import platform 3 | 4 | # On OSX, we should use kqueue rather than the default select 5 | # backend. (Proximal issue is that select only can handle a limited 6 | # number of file descriptors.) 7 | # 8 | # Based off twisted.internet.default 9 | def _get_reactor(platform): 10 | try: 11 | if platform.isLinux(): 12 | try: 13 | from twisted.internet import epollreactor 14 | cls = epollreactor.EPollReactor 15 | except ImportError: 16 | from twisted.internet import pollreactor 17 | cls = pollreactor.PollReactor 18 | elif platform.isMacOSX(): 19 | from twisted.internet import kqreactor 20 | cls = kqreactor.KQueueReactor 21 | elif platform.getType() == 'posix' and not platform.isMacOSX(): 22 | from twisted.internet import pollreactor 23 | cls = pollreactor.PollReactor 24 | else: 25 | from twisted.internet import selectreactor 26 | cls = selectreactor.SelectReactor 27 | except ImportError: 28 | from twisted.internet import selectreactor 29 | cls = selectreactor.SelectReactor 30 | return cls() 31 | 32 | class TwistedThread(threading.Thread): 33 | started = False 34 | daemon = True 35 | 36 | @classmethod 37 | def start_once(cls): 38 | if cls.started: 39 | return 40 | cls.started = True 41 | 42 | instance = cls(name='Twisted') 43 | instance.start() 44 | 45 | def run(self): 46 | reactor.run(installSignalHandlers=False) 47 | 48 | reactor = _get_reactor(platform) 49 | start_once = TwistedThread.start_once 50 | -------------------------------------------------------------------------------- /universe/remotes/build.py: -------------------------------------------------------------------------------- 1 | import re 2 | from universe import error 3 | from universe.remotes.allocator_remote import AllocatorManager 4 | from universe.remotes.docker_remote import DockerManager 5 | from universe.remotes.hardcoded_addresses import HardcodedAddresses 6 | 7 | def build(client_id, remotes, runtime=None, start_timeout=None, **kwargs): 8 | if isinstance(remotes, int): 9 | remotes = str(remotes) 10 | elif not isinstance(remotes, str): 11 | raise error.Error('remotes argument must be a string, got {} which is of type {}'.format(remotes, type(remotes))) 12 | 13 | if re.search('^\d+$', remotes): # an integer, like -r 20 14 | n = int(remotes) 15 | return DockerManager( 16 | runtime=runtime, 17 | start_timeout=start_timeout, 18 | reuse=kwargs.get('reuse', False), 19 | n=n, 20 | ), n 21 | elif remotes.startswith('vnc://'): 22 | return HardcodedAddresses.build( 23 | remotes, 24 | start_timeout=start_timeout) 25 | elif remotes.startswith('http://') or remotes.startswith('https://'): 26 | if runtime is None: 27 | raise error.Error('Must provide a runtime. HINT: try creating your env instance via gym.make("flashgames.DuskDrive-v0")') 28 | 29 | manager, n = AllocatorManager.from_remotes( 30 | client_id, 31 | remotes, 32 | runtime_id=runtime.id, 33 | runtime_tag=runtime.image.split(':')[-1], 34 | start_timeout=start_timeout, 35 | api_key=kwargs.get('api_key'), 36 | use_recorder_ports=kwargs.get('use_recorder_ports', False), 37 | ) 38 | manager.start() 39 | return manager, n 40 | else: 41 | raise error.Error('Invalid remotes: {!r}. Must be an integer or must start with vnc:// or https://'.format(remotes)) 42 | -------------------------------------------------------------------------------- /universe/vectorized/core.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from universe import error 4 | 5 | class Env(gym.Env): 6 | """Base class capable of handling vectorized environments. 7 | """ 8 | metadata = { 9 | # This key indicates whether an env is vectorized (or, in the case of 10 | # Wrappers where autovectorize=True, whether they should automatically 11 | # be wrapped by a Vectorize wrapper.) 12 | 'runtime.vectorized': True, 13 | } 14 | 15 | # Number of remotes. User should set this. 16 | n = None 17 | 18 | 19 | class Wrapper(Env, gym.Wrapper): 20 | """Use this instead of gym.Wrapper iff you're wrapping a vectorized env, 21 | (or a vanilla env you wish to be vectorized). 22 | """ 23 | # If True and this is instantiated with a non-vectorized environment, 24 | # automatically wrap it with the Vectorize wrapper. 25 | autovectorize = True 26 | 27 | def __init__(self, env): 28 | super(Wrapper, self).__init__(env) 29 | if not env.metadata.get('runtime.vectorized'): 30 | if self.autovectorize: 31 | # Circular dependency :( 32 | from universe import wrappers 33 | env = wrappers.Vectorize(env) 34 | else: 35 | raise error.Error('This wrapper can only wrap vectorized envs (i.e. where env.metadata["runtime.vectorized"] = True), not {}. Set "self.autovectorize = True" to automatically add a Vectorize wrapper.'.format(env)) 36 | 37 | self.env = env 38 | 39 | @property 40 | def n(self): 41 | return self.env.n 42 | 43 | def configure(self, **kwargs): 44 | self.env.configure(**kwargs) 45 | 46 | class ObservationWrapper(Wrapper, gym.ObservationWrapper): 47 | pass 48 | 49 | class RewardWrapper(Wrapper, gym.RewardWrapper): 50 | pass 51 | 52 | class ActionWrapper(Wrapper, gym.ActionWrapper): 53 | pass 54 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | services: 5 | - docker 6 | before_install: 7 | - docker build -f test.dockerfile -t quay.io/openai/universe:test . 8 | script: 9 | - docker run -v /usr/bin/docker:/usr/bin/docker -v /root/.docker:/root/.docker -v /var/run/docker.sock:/var/run/docker.sock --net=host quay.io/openai/universe:test 10 | notifications: 11 | slack: 12 | secure: HtkwTGU+cQbpQuRaMuC2ZcuaaJfUBEZxSaChkj74lFulHAc6g/Xj1ztzj/roR/kMl3dycYPl5QL5AkxPPD/x8BweOJmgabe9boPbU9+80tpa0ueZnt0q6vX23ZA7EcqIAOwQqHiaklxoCkSflpV2N9GP20yBf5YNneHWsbFc8RDuJmNsg8s+1sZIrT3aOcvAJmu8WrNVclKvnpH/qCtvkK6npXZvdMvGpQPT/uCYOyPcbURqelk7qzNpT0oJmkrutbkT3Hp03NRDEQgS47pTPMC5pklea5zDkyh++ETEMpXU75UgN3CURKhuf/oyq7JorG/lXQaz6HBYbcT9EhPVpTzPZEczk50VAp3RWWcN6NczJJ9rVL0h+bGZmcOlJz9igNl838ziL6nxMFO9W3psXQUoBvEDo+vXPDEOUxeBrtLqUN1vfQmMw7KKiGIimInWigW19WfVQhSt47+xKKmbvBKtQ/w8lCDlwO5h7QbApv6TiaGzxtzdJMAyhNOZE7KxqvtFCJgKL4ZfmVzziLlbdbr582Cc0wxvGLDC341+CqkYVv83oimM8Ks3wHRT/ABoO1uXOSsZniUU/+oU/mzyrhrkGNNSDCwdJ0mVEWRGTYZs26IcBIeYGsLJrv3J9ZgfiyD2Knl4/yVI0IbTs7qAzhBzsXvt9aH7kH7tXYZH9QQ= 13 | webhooks: 14 | urls: 15 | - https://hooks.zapier.com/hooks/catch/1711022/6ztmzh/ 16 | - https://hooks.zapier.com/hooks/catch/1711022/6zhc8p/ 17 | on_success: always 18 | on_failure: always 19 | after_success: 20 | - export BRANCH=$(if [ "$TRAVIS_PULL_REQUEST" == "false" ]; then echo $TRAVIS_BRANCH; else echo $TRAVIS_PULL_REQUEST_BRANCH; fi) 21 | - echo "TRAVIS_BRANCH=$TRAVIS_BRANCH, PR=$PR, BRANCH=$BRANCH" 22 | - docker login quay.io -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" 23 | - docker tag quay.io/openai/universe:test quay.io/openai/universe-travis:passed-ci 24 | - if [ "$BRANCH" == "master" ]; then ( while true; do echo '.'; sleep 60; done ) & docker push quay.io/openai/universe-travis:passed-ci; fi # This repo is used by universe-envs to run integration test. We echo in order to keep travis alive during a slow push 25 | -------------------------------------------------------------------------------- /universe/wrappers/experimental/observation.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from universe import vectorized, runtime_spec 4 | 5 | logger = logging.getLogger(__name__) 6 | logger.setLevel(logging.INFO) 7 | 8 | 9 | def CropObservations(env): 10 | """" 11 | Crops the visual observations of an environment so that they only contain the game screen. 12 | Removes anything outside the game that usually belongs to universe (browser borders and so on). 13 | """ 14 | if env.spec.tags.get('flashgames', False): 15 | spec = runtime_spec('flashgames').server_registry[env.spec.id] 16 | return _CropObservations(env, x=18, y=84, height=spec["height"], width=spec["width"]) 17 | elif (env.spec.tags.get('atari', False) and env.spec.tags.get('vnc', False)): 18 | return _CropObservations(env, height=194, width=160) 19 | else: 20 | # if unknown environment (or local atari), do nothing 21 | return env 22 | 23 | class _CropObservations(vectorized.ObservationWrapper): 24 | def __init__(self, env, height, width, x=0, y=0): 25 | super(_CropObservations, self).__init__(env) 26 | self.x = x 27 | self.y = y 28 | self.height = height 29 | self.width = width 30 | 31 | # modify observation_space? (if so, how to know depth and channels before we have seen the first frame?) 32 | # self.observation_space = Box(0, 255, shape=(height, width, 3)) 33 | 34 | def _observation(self, observation_n): 35 | return [self._crop_frame(observation) for observation in observation_n] 36 | 37 | def _crop_frame(self, frame): 38 | if frame is not None: 39 | if isinstance(frame, dict): 40 | frame['vision'] = frame['vision'][self.y:self.y + self.height, self.x:self.x + self.width] 41 | else: 42 | frame = frame[self.y:self.y + self.height, self.x:self.x + self.width] 43 | return frame 44 | -------------------------------------------------------------------------------- /universe/vectorized/vectorize_filter.py: -------------------------------------------------------------------------------- 1 | from universe.vectorized import core 2 | 3 | class Filter(object): 4 | def _after_reset(self, observation): 5 | return observation 6 | 7 | def _after_step(self, observation, reward, done, info): 8 | return observation, reward, done, info 9 | 10 | class VectorizeFilter(core.Wrapper): 11 | """Vectorizes a Filter written for the non-vectorized case.""" 12 | 13 | autovectorize = False 14 | metadata = { 15 | 'configure.required': True 16 | } 17 | 18 | def __init__(self, env, filter_factory, *args, **kwargs): 19 | super(VectorizeFilter, self).__init__(env) 20 | self.filter_factory = filter_factory 21 | self.filter_n = None 22 | self._args = args 23 | self._kwargs = kwargs 24 | 25 | def _reset(self): 26 | if self.filter_n is None: 27 | self.filter_n = [self.filter_factory(*self._args, **self._kwargs) for _ in range(self.n)] 28 | observation_n = self.env.reset() 29 | observation_n = [filter._after_reset(observation) for filter, observation in zip(self.filter_n, observation_n)] 30 | return observation_n 31 | 32 | def _step(self, action_n): 33 | o_n, r_n, d_n, i = self.env.step(action_n) 34 | 35 | observation_n = [] 36 | reward_n = [] 37 | done_n = [] 38 | info = i.copy() 39 | info['n'] = [] 40 | for filter, observation, reward, done, info_i in zip(self.filter_n, o_n, r_n, d_n, i['n']): 41 | observation, reward, done, info_i = filter._after_step(observation, reward, done, info_i) 42 | observation_n.append(observation) 43 | reward_n.append(reward) 44 | done_n.append(done) 45 | info['n'].append(info_i) 46 | return observation_n, reward_n, done_n, info 47 | 48 | def __str__(self): 49 | return '<{}[{}]{}>'.format(type(self).__name__, self.filter_factory, self.env) 50 | -------------------------------------------------------------------------------- /universe/wrappers/render.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from twisted.python.runtime import platform 4 | from universe import vectorized 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | class Render(vectorized.Wrapper): 9 | metadata = { 10 | 'configure.required': True 11 | } 12 | def __init__(self, *args, **kwargs): 13 | if platform.isLinux() and not os.environ.get('DISPLAY'): 14 | self.renderable = False 15 | else: 16 | self.renderable = True 17 | self._observation = None 18 | super(Render, self).__init__(*args, **kwargs) 19 | 20 | def configure(self, **kwargs): 21 | self.env.configure(**kwargs) 22 | self.metadata = self.metadata.copy() 23 | modes = self.metadata.setdefault('render.modes', []) 24 | if 'rgb_array' not in modes: 25 | modes.append('rgb_array') 26 | 27 | def _reset(self): 28 | observation_n = self.env.reset() 29 | self._observation = observation_n[0] 30 | return observation_n 31 | 32 | def _step(self, action_n): 33 | observation_n, reward_n, done_n, info_n = self.env.step(action_n) 34 | self._observation = observation_n[0] 35 | return observation_n, reward_n, done_n, info_n 36 | 37 | def _render(self, mode='human', *args, **kwargs): 38 | if not self.renderable and mode == 'human': 39 | return 40 | elif self.env is None: 41 | # Only when this breaks 42 | return 43 | elif mode == 'rgb_array': 44 | if self._observation is not None: 45 | observation = self._observation 46 | if isinstance(self._observation, dict): 47 | observation = observation['vision'] 48 | return observation 49 | else: 50 | return None 51 | # Could log, but no need. 52 | return self.env.render(mode=mode, *args, **kwargs) 53 | -------------------------------------------------------------------------------- /universe/vncdriver/fbs_writer.py: -------------------------------------------------------------------------------- 1 | import json 2 | import struct 3 | import time 4 | 5 | from gym.utils import atomic_write, closer 6 | 7 | fbs_closer = closer.Closer() 8 | 9 | class FBSWriter(object): 10 | def __init__(self, path): 11 | self._closed = False 12 | 13 | self.start = None 14 | self.stop = None 15 | 16 | self._id = fbs_closer.register(self) 17 | 18 | self.file = open(path, 'wb') 19 | # custom format: exactly the same as FBS 001.000 except: 20 | # 21 | # FBS 001.002 22 | # {line-of-json} 23 | # [length-byte, data, timestamp]... 24 | # \0\0\0\0 {line-of-json} 25 | self.file.write(b'FBS 001.002\n') 26 | 27 | def write(self, data): 28 | # Format: 29 | # 30 | # length 31 | # data 32 | # timestamp (4 bytes) 33 | 34 | if not data: 35 | return 36 | 37 | if self.start is not None: 38 | delta = int(1000 * (time.time() - self.start)) 39 | else: 40 | delta = 0 41 | self.start = time.time() 42 | 43 | # Write metadata header 44 | self.file.write(json.dumps({'start': self.start}).encode('utf-8')) 45 | self.file.write(b'\n') 46 | 47 | length = struct.pack('!I', len(data)) 48 | self.file.write(length) 49 | self.file.write(data) 50 | 51 | delta = struct.pack('!I', delta) 52 | self.file.write(delta) 53 | 54 | def _write_metadata(self): 55 | # Write metadata trailer 56 | null = struct.pack('!I', 0) 57 | self.file.write(null) 58 | self.file.write(json.dumps({'stop': self.stop}).encode('utf-8')) 59 | self.file.write(b'\n') 60 | 61 | def close(self): 62 | if self._closed: 63 | return 64 | self._closed = True 65 | 66 | fbs_closer.unregister(self._id) 67 | self.stop = time.time() 68 | self._write_metadata() 69 | self.file.close() 70 | 71 | def __del__(self): 72 | self.close() 73 | -------------------------------------------------------------------------------- /universe/wrappers/time_limit.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import time 4 | from universe import pyprofile 5 | from universe.vectorized import core 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | DEFAULT_MAX_EPISODE_SECONDS = 20 * 60. # Default to 20 minutes if there is no explicit limit 10 | 11 | class UniverseTimeLimit(core.Wrapper): 12 | def __init__(self, env): 13 | super(UniverseTimeLimit, self).__init__(env) 14 | self._max_episode_seconds = self.env.spec.max_episode_seconds 15 | self._max_episode_steps = self.env.spec.max_episode_steps 16 | 17 | if self._max_episode_seconds is None and self._max_episode_steps is None: 18 | self._max_episode_seconds = DEFAULT_MAX_EPISODE_SECONDS 19 | 20 | self._elapsed_steps = 0 21 | self._episode_started_at = None 22 | 23 | @property 24 | def _elapsed_seconds(self): 25 | return time.time() - self._episode_started_at 26 | 27 | def _past_limit(self): 28 | """Return true if we are past our limit""" 29 | if self._max_episode_steps is not None and self._max_episode_steps <= self._elapsed_steps: 30 | logger.debug("Env has passed the step limit defined by TimeLimit.") 31 | return True 32 | 33 | if self._max_episode_seconds is not None and self._max_episode_seconds <= self._elapsed_seconds: 34 | logger.debug("Env has passed the seconds limit defined by TimeLimit.") 35 | return True 36 | 37 | return False 38 | 39 | def _step(self, action_n): 40 | assert self._episode_started_at is not None, "Cannot call env.step() before calling reset()" 41 | observation_n, reward_n, done_n, info = self.env.step(action_n) 42 | self._elapsed_steps += 1 43 | 44 | if self._past_limit(): 45 | _ = self.reset() # Force a reset, discard the observation 46 | done_n = [True] * self.n # Force a done = True 47 | 48 | return observation_n, reward_n, done_n, info 49 | 50 | def _reset(self): 51 | self._episode_started_at = time.time() 52 | self._elapsed_steps = 0 53 | return self.env.reset() 54 | TimeLimit = UniverseTimeLimit 55 | -------------------------------------------------------------------------------- /universe/vncdriver/fbs_reader.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import struct 4 | 5 | from universe import error 6 | 7 | class InvalidFBSFileError(error.Error): 8 | pass 9 | 10 | class FBSReader(object): 11 | def __init__(self, path): 12 | self.file = open(path, 'rb') 13 | version = self.file.read(12) 14 | if version != b'FBS 001.002\n': 15 | raise InvalidFBSFileError('Unrecognized FBS version: {}'.format(version)) 16 | 17 | header = self.file.readline() 18 | pos = self.file.tell() 19 | self.file.seek(pos, os.SEEK_SET) 20 | 21 | header = json.loads(header.decode('utf-8')) 22 | self.start = header['start'] 23 | 24 | def __iter__(self): 25 | return self 26 | 27 | def read_safe(self, size=None): 28 | """ 29 | We currently close our fbs files by killing them, so sometimes they end 30 | up with bad data at the end. Close our reader if we expect `size` bytes 31 | and get fewer. 32 | 33 | This is a hack and should be removed when we cleanly close our 34 | connections in fbs_writer. 35 | 36 | https://github.com/openai/universe-envs/issues/41 37 | """ 38 | bytes = self.file.read(size) 39 | if len(bytes) != size: 40 | # We unexpectedly got to the end of the file 41 | self.close() 42 | raise StopIteration 43 | return bytes 44 | 45 | def next(self): 46 | return self.__next__() 47 | 48 | def __next__(self): 49 | length_str = self.read_safe(4) 50 | if length_str == '': 51 | # Indicates a file with no trailer 52 | self.close() 53 | raise StopIteration 54 | (length,) = struct.unpack('!I', length_str) 55 | 56 | if length == 0: 57 | # Reached the end 58 | self.close() 59 | raise StopIteration() 60 | 61 | data = self.read_safe(length) 62 | timestamp_str = self.read_safe(4) 63 | (timestamp,) = struct.unpack('!I', timestamp_str) 64 | 65 | return data, self.start + timestamp/1000. 66 | 67 | def close(self): 68 | self.file.close() 69 | -------------------------------------------------------------------------------- /universe/vncdriver/screen/screen_buffer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | import threading 4 | 5 | from universe.vncdriver.screen import numpy_screen 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | class ScreenBuffer(object): 10 | def __init__(self): 11 | self.lock = threading.Lock() 12 | self.uncommitted = [] 13 | self.updates = [] 14 | 15 | def apply_format(self, attrs): 16 | self._push({ 17 | 'type': 'apply_format', 18 | 'attrs': attrs, 19 | }) 20 | 21 | def update_rectangle(self, x, y, width, height, data): 22 | self._push({ 23 | 'type': 'update_rectangle', 24 | 'x': x, 25 | 'y': y, 26 | 'width': width, 27 | 'height': height, 28 | 'data': data, 29 | }) 30 | 31 | def copy_rectangle(self, src_x, src_y, x, y, width, height): 32 | self._push({ 33 | 'type': 'copy_rectangle', 34 | 'src_x': src_x, 35 | 'src_y': src_y, 36 | 'x': x, 37 | 'y': y, 38 | 'width': width, 39 | 'height': height, 40 | }) 41 | 42 | def fill_rectangle(self, x, y, width, height, color): 43 | self._push({ 44 | 'type': 'fill_rectangle', 45 | 'x': x, 46 | 'y': y, 47 | 'width': width, 48 | 'height': height, 49 | 'color': color, 50 | }) 51 | 52 | def framebuffer_update_finish(self): 53 | with self.lock: 54 | self.updates += self.uncommitted 55 | self.uncommitted = [] 56 | 57 | def _push(self, update): 58 | """Always call from single thread.""" 59 | self.uncommitted.append(update) 60 | 61 | def pop(self): 62 | with self.lock: 63 | if self.updates: 64 | updates = self.updates 65 | self.updates = [] 66 | return updates 67 | else: 68 | return None 69 | 70 | def peek(self): 71 | with self.lock: 72 | if self.updates: 73 | return self.updates 74 | else: 75 | return None 76 | -------------------------------------------------------------------------------- /example/system-diagnostics/system_diagnostics_logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import psutil 5 | import time 6 | 7 | class DiagnosticsLogger(object): 8 | def __init__(self, interval=5): 9 | self.interval = interval 10 | self.last_cpu_times = {} # pid -> (user, sys) 11 | 12 | def run(self): 13 | while True: 14 | cpu_times, chrome_reset = self.cpu_times() 15 | print(json.dumps({ 16 | 'time': time.time(), 17 | 'cpu_times': cpu_times, 18 | 'cpu_percent': psutil.cpu_percent(percpu=True), 19 | 'chrome_reset': chrome_reset, 20 | }), flush=True) 21 | self.chrome_reset = False 22 | time.sleep(self.interval) 23 | 24 | def get_chrome_procs(self): 25 | def is_chrome(proc): 26 | try: 27 | return proc.name() == 'chrome' 28 | except psutil.ZombieProcess: 29 | return False 30 | return [p for p in psutil.process_iter() if is_chrome(p)] 31 | 32 | def cpu_times(self): 33 | ''' return {pid: {'user': 0.0, 'sys': 0.0}}, chrome_reset ''' 34 | chrome_procs = self.get_chrome_procs() 35 | new_pids = {p.pid for p in chrome_procs} 36 | old_pids = {pid for pid in self.last_cpu_times} 37 | try: 38 | cpu_times = {p.pid: p.cpu_times() for p in chrome_procs} 39 | except psutil.NoSuchProcess: 40 | # Chrome restarted since fetching the new pids above. Better luck next time. 41 | return {}, True 42 | if new_pids != old_pids: 43 | # We don't know when the Chrome procs were restarted, so don't 44 | # return elapsed time until next run. 45 | self.last_cpu_times = cpu_times 46 | return {}, True 47 | # Same chrome pids as last run: measure the elapsed cpu times 48 | ordered_old_times = (self.last_cpu_times[p.pid] for p in chrome_procs) 49 | ordered_new_times = (cpu_times[p.pid] for p in chrome_procs) 50 | cpu_times_diff = {p.pid: {'user': (t[0] - l[0]) / self.interval, 'sys': (t[1] - l[1]) / self.interval} 51 | for (p, t, l) in zip(chrome_procs, ordered_new_times, ordered_old_times)} 52 | self.last_cpu_times = cpu_times 53 | return cpu_times_diff, False 54 | 55 | if __name__ == '__main__': 56 | DiagnosticsLogger().run() 57 | 58 | -------------------------------------------------------------------------------- /universe/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import universe.wrappers.experimental 3 | from universe import envs, spaces 4 | from universe.wrappers import gym_core_sync 5 | from universe.wrappers.blocking_reset import BlockingReset 6 | from universe.wrappers.diagnostics import Diagnostics 7 | from universe.wrappers.gym_core import GymCoreAction, GymCoreObservation, CropAtari 8 | from universe.wrappers.joint import Joint 9 | from universe.wrappers.logger import Logger 10 | from universe.wrappers.monitoring import Monitor 11 | from universe.wrappers.multiprocessing_env import WrappedMultiprocessingEnv, EpisodeID 12 | from universe.wrappers.recording import Recording 13 | from universe.wrappers.render import Render 14 | from universe.wrappers.throttle import Throttle 15 | from universe.wrappers.time_limit import TimeLimit 16 | from universe.wrappers.timer import Timer 17 | from universe.wrappers.vectorize import Vectorize, Unvectorize, WeakUnvectorize 18 | from universe.wrappers.vision import Vision 19 | 20 | 21 | def wrap(env): 22 | return Timer(Render(Throttle(env))) 23 | 24 | def WrappedVNCEnv(): 25 | return wrap(envs.VNCEnv()) 26 | 27 | def WrappedGymCoreEnv(gym_core_id, fps=None, rewarder_observation=False): 28 | # Don't need to store the ID on the instance; it'll be retrieved 29 | # directly from the spec 30 | env = wrap(envs.VNCEnv(fps=fps)) 31 | if rewarder_observation: 32 | env = GymCoreObservation(env, gym_core_id=gym_core_id) 33 | return env 34 | 35 | def WrappedGymCoreSyncEnv(gym_core_id, fps=60, rewarder_observation=False): 36 | spec = gym.spec(gym_core_id) 37 | env = gym_core_sync.GymCoreSync(BlockingReset(wrap(envs.VNCEnv(fps=fps)))) 38 | if rewarder_observation: 39 | env = GymCoreObservation(env, gym_core_id=gym_core_id) 40 | elif spec._entry_point.startswith('gym.envs.atari:'): 41 | env = CropAtari(env) 42 | 43 | return env 44 | 45 | def WrappedFlashgamesEnv(): 46 | keysym = spaces.KeyEvent.by_name('`').key 47 | return wrap(envs.VNCEnv(probe_key=keysym)) 48 | 49 | def WrappedInternetEnv(*args, **kwargs): 50 | return wrap(envs.InternetEnv(*args, **kwargs)) 51 | 52 | def WrappedStarCraftEnv(*args, **kwargs): 53 | return wrap(envs.StarCraftEnv(*args, **kwargs)) 54 | 55 | def WrappedGTAVEnv(*args, **kwargs): 56 | return wrap(envs.GTAVEnv(*args, **kwargs)) 57 | 58 | def WrappedWorldOfGooEnv(*args, **kwargs): 59 | return wrap(envs.WorldOfGooEnv(*args, **kwargs)) 60 | -------------------------------------------------------------------------------- /universe/wrappers/blocking_reset.py: -------------------------------------------------------------------------------- 1 | from universe import rewarder, spaces, vectorized 2 | 3 | class BlockingReset(vectorized.Wrapper): 4 | """ 5 | By default, a reset in universe is not a blocking operation. This 6 | wrapper changes it. 7 | """ 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(BlockingReset, self).__init__(*args, **kwargs) 11 | self.reward_n = None 12 | self.done_n = None 13 | self.info = None 14 | 15 | def _reset(self): 16 | observation_n = self.env.reset() 17 | self.reward_n = [0] * self.n 18 | self.done_n = [False] * self.n 19 | self.info = {'n': [{} for _ in range(self.n)]} 20 | 21 | while any(ob is None for ob in observation_n): 22 | action_n = [] 23 | for done in self.done_n: 24 | if done: 25 | # No popping of reward/done. Don't want to merge across episode boundaries. 26 | action_n.append([spaces.PeekReward]) 27 | else: 28 | action_n.append([]) 29 | new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(action_n) 30 | rewarder.merge_n( 31 | observation_n, self.reward_n, self.done_n, self.info, 32 | new_observation_n, new_reward_n, new_done_n, new_info 33 | ) 34 | return observation_n 35 | 36 | def _step(self, action_n): 37 | observation_n, reward_n, done_n, info = self.env.step(action_n) 38 | if self.reward_n is not None: 39 | rewarder.merge_n( 40 | observation_n, reward_n, done_n, info, 41 | [None] * self.n, self.reward_n, self.done_n, self.info 42 | ) 43 | self.reward_n = self.done_n = self.info = None 44 | 45 | while any(ob is None for ob in observation_n): 46 | action_n = [] 47 | for done in done_n: 48 | if done: 49 | # No popping of reward/done. Don't want to merge across episode boundaries. 50 | action_n.append([spaces.PeekReward]) 51 | else: 52 | action_n.append([]) 53 | new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(action_n) 54 | rewarder.merge_n( 55 | observation_n, reward_n, done_n, info, 56 | new_observation_n, new_reward_n, new_done_n, new_info 57 | ) 58 | return observation_n, reward_n, done_n, info 59 | -------------------------------------------------------------------------------- /universe/vncdriver/dual_proxy_server.py: -------------------------------------------------------------------------------- 1 | # a proxy server that handles both reward channel and vnc. 2 | from twisted.python import log 3 | from autobahn.twisted import websocket 4 | import logging 5 | import os 6 | import time 7 | import pexpect 8 | import sys 9 | import threading 10 | 11 | from universe.vncdriver.vnc_proxy_server import VNCProxyServer 12 | from universe.rewarder.reward_proxy_server import RewardProxyServer 13 | from universe import utils 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class DualProxyServer(VNCProxyServer): 19 | def __init__(self, action_queue=None, error_buffer=None, enable_logging=True): 20 | self._log_info('DualProxyServer inited') 21 | self.reward_proxy = None 22 | 23 | super(DualProxyServer, self).__init__(action_queue, error_buffer, enable_logging) 24 | 25 | def _log_info(self, msg, *args, **kwargs): 26 | logger.info('[dual_proxy] ' + msg, *args, **kwargs) 27 | 28 | def recv_ClientInit(self, block): 29 | # start reward proxy. 30 | self._log_info('Starting reward proxy server') 31 | self.reward_proxy = pexpect.spawnu(self.factory.reward_proxy_bin, 32 | logfile=sys.stdout, 33 | timeout=None) 34 | 35 | # wait on reward proxy to be up. 36 | self._log_info('Waiting for reward proxy server') 37 | self.reward_proxy.expect('\[RewardProxyServer\]') 38 | self.reward_proxy_thread = threading.Thread(target=lambda: self.reward_proxy.expect(pexpect.EOF)) 39 | self.reward_proxy_thread.start() 40 | 41 | self._log_info('Reward proxy server is up %s', self.reward_proxy.before) 42 | 43 | super(DualProxyServer, self).recv_ClientInit(block) 44 | 45 | self.logfile_dir = self.log_manager.logfile_dir 46 | 47 | def close(self): 48 | # end connections. 49 | super(DualProxyServer, self).close() 50 | 51 | # wait for rewarder to close. 52 | if self.reward_proxy: 53 | self.reward_proxy.terminate() 54 | 55 | # upload to s3. 56 | # probably hacky right now. 57 | logger.info('log manager = %s', self.log_manager) 58 | if self.log_manager: 59 | os.system('/app/universe/bin/upload_directory.sh demonstrator_%(recorder_id)s %(directory)s %(bucket)s' % 60 | dict(recorder_id=self.factory.recorder_id, directory=self.logfile_dir, 61 | bucket=self.factory.bucket) 62 | ) 63 | 64 | -------------------------------------------------------------------------------- /universe/wrappers/joint.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import pool 2 | from universe import error, rewarder, vectorized 3 | 4 | class Joint(vectorized.Wrapper): 5 | def __init__(self, env_m): 6 | self.env_m = env_m 7 | 8 | # TODO: generalize this. Doing so requires adding a vectorized 9 | # space mode. 10 | self.action_space = env_m[0].action_space 11 | self.observation_space = env_m[0].observation_space 12 | 13 | self.pool = pool.ThreadPool(min(len(env_m), 5)) 14 | 15 | self._n = sum(env.n for env in self.env_m) 16 | self.metadata = self.metadata.copy() 17 | self.metadata['render.modes'] = self.env_m[0].metadata['render.modes'] 18 | 19 | @property 20 | def n(self): 21 | return self._n 22 | 23 | def _close(self): 24 | if hasattr(self, 'pool'): 25 | self.pool.close() 26 | 27 | def _render(self, mode='human', close=False): 28 | return self.env_m[0]._render(mode=mode, close=close) 29 | 30 | def _reset(self): 31 | # Keep all env[0] action on the main thread, in case we ever 32 | # need to render. Otherwise we get segfaults from the 33 | # go-vncdriver. 34 | reset_m_async = self.pool.map_async(lambda env: env.reset(), self.env_m[1:]) 35 | reset = self.env_m[0].reset() 36 | reset_m = [reset] + reset_m_async.get() 37 | 38 | observation_n = [] 39 | for observation_m in reset_m: 40 | observation_n += observation_m 41 | return observation_n 42 | 43 | def _step(self, action_n): 44 | observation_n = [] 45 | reward_n = [] 46 | done_n = [] 47 | info_n = [] 48 | info = {} 49 | 50 | action_m = [] 51 | for env in self.env_m: 52 | action_m.append(action_n[len(action_m):len(action_m)+env.n]) 53 | 54 | # Keep all env[0] action on the main thread, in case we ever 55 | # need to render. Otherwise we get segfaults from the 56 | # go-vncdriver. 57 | step_m_async = self.pool.map_async(lambda arg: arg[0].step(arg[1]), zip(self.env_m[1:], action_m[1:])) 58 | step = self.env_m[0].step(action_m[0]) 59 | step_m = [step] + step_m_async.get() 60 | 61 | for observation_m, reward_m, done_m, _info in step_m: 62 | observation_n += observation_m 63 | reward_n += reward_m 64 | done_n += done_m 65 | 66 | # copy in any info keys 67 | rewarder.merge_infos(info, _info) 68 | info_n += _info['n'] 69 | 70 | info['n'] = info_n 71 | return observation_n, reward_n, done_n, info 72 | -------------------------------------------------------------------------------- /universe/envs/vnc_starcraft.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | from universe import spaces 4 | from universe.spaces import vnc_event, VNCActionSpace 5 | from universe.spaces.vnc_event import KeyEvent, PointerEvent 6 | from universe.envs import vnc_env 7 | from universe.vncdriver import constants 8 | import logging 9 | 10 | logger = logging.getLogger() 11 | 12 | SCREEN_DIM = (640, 480) 13 | 14 | class StarCraftEnv(vnc_env.VNCEnv): 15 | def __init__(self): 16 | super(StarCraftEnv, self).__init__() 17 | self.action_space = VNCActionSpace( 18 | keys=['f2', # Map positions 19 | 'f3', # Map positions 20 | 'f4', # Map positions 21 | 'spacebar', 22 | 'left', 23 | 'up', 24 | 'right', 25 | 'down'], 26 | screen_shape=SCREEN_DIM 27 | ) 28 | self.safe_action_space = self.action_space 29 | 30 | # def _step(self, action_n): 31 | # return super(StarCraftEnv, self)._step( 32 | # (StarCraftEventFilter.filter(a) for a in action_n)) 33 | 34 | 35 | # class StarCraftEventFilter(object): 36 | # """ 37 | # We only allow keyboard inputs used by StarCraft: 38 | # http://gamingweapons.com/image/steelseries/zboard-starcraft2-keyset/steelseries_zboard_starcraft2_keyset_02.jpg 39 | # """ 40 | # _x_offset = 5 # Centered 41 | # _y_offset = 30 # Remove the chrome 42 | 43 | # @classmethod 44 | # def _safe_pointer_event(cls, event): 45 | # """Returns true if the click is in a place that will not break out of the box""" 46 | # height = SCREEN_DIM[0] 47 | # width = SCREEN_DIM[1] 48 | # margin = 5 # Never allow clicking within 5 pixels of the edge of the screen 49 | 50 | # unsafe_locations = [ 51 | # (event.y < cls._y_offset + margin), # At the top, where menu chrome is 52 | # (event.y > height + cls._y_offset - margin), # Too far down 53 | # (event.x < cls._x_offset + margin), # Too far left 54 | # (event.x > width + cls._x_offset - margin), # Too far right 55 | # (410 < event.x < 510) and (370 < event.y < 450), # Where the menu button is 56 | # ] 57 | # unsafe = any(unsafe_locations) 58 | # if unsafe: 59 | # logger.warning('skipping unsafe pointer event') 60 | # return not unsafe 61 | 62 | # @classmethod 63 | # def safe_event(cls, event): 64 | # if isinstance(event, PointerEvent): 65 | # return cls._safe_pointer_event(event) 66 | 67 | # @classmethod 68 | # def filter(cls, events): 69 | # return filter(cls.safe_event, events) 70 | -------------------------------------------------------------------------------- /universe/wrappers/tests/test_time_limit.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import time 3 | import universe 4 | from gym.envs import register 5 | from universe import wrappers 6 | 7 | register( 8 | id='test.SecondsLimitDummyVNCEnv-v0', 9 | entry_point='universe.envs:DummyVNCEnv', 10 | max_episode_seconds=0.1, 11 | tags={ 12 | 'vnc': True, 13 | } 14 | ) 15 | 16 | register( 17 | id='test.StepsLimitDummyVNCEnv-v0', 18 | entry_point='universe.envs:DummyVNCEnv', 19 | max_episode_steps=2, 20 | tags={ 21 | 'vnc': True, 22 | } 23 | ) 24 | 25 | 26 | def test_steps_limit_restart(): 27 | env = gym.make('test.StepsLimitDummyVNCEnv-v0') 28 | env.configure(_n=1) 29 | env = wrappers.TimeLimit(env) 30 | env.reset() 31 | 32 | assert env._max_episode_seconds == None 33 | assert env._max_episode_steps == 2 34 | 35 | # Episode has started 36 | _, _, done, info = env.step([[]]) 37 | assert done == [False] 38 | 39 | # Limit reached, now we get a done signal and the env resets itself 40 | _, _, done, info = env.step([[]]) 41 | assert done == [True] 42 | assert env._elapsed_steps == 0 43 | 44 | 45 | def test_steps_limit_restart_unused_when_not_wrapped(): 46 | env = gym.make('test.StepsLimitDummyVNCEnv-v0') 47 | env.configure(_n=1) 48 | env.reset() 49 | 50 | for i in range(10): 51 | _, _, done, info = env.step([[]]) 52 | assert done == [False] 53 | 54 | 55 | def test_seconds_limit_restart(): 56 | env = gym.make('test.SecondsLimitDummyVNCEnv-v0') 57 | env.configure(_n=1) 58 | env = wrappers.TimeLimit(env) 59 | env.reset() 60 | 61 | assert env._max_episode_seconds == 0.1 62 | assert env._max_episode_steps == None 63 | 64 | # Episode has started 65 | _, _, done, info = env.step([[]]) 66 | assert done == [False] 67 | 68 | # Not enough time has passed 69 | _, _, done, info = env.step([[]]) 70 | assert done == [False] 71 | 72 | time.sleep(0.2) 73 | 74 | # Limit reached, now we get a done signal and the env resets itself 75 | _, _, done, info = env.step([[]]) 76 | assert done == [True] 77 | 78 | 79 | def test_default_time_limit(): 80 | # We need an env without a default limit 81 | register( 82 | id='test.NoLimitDummyVNCEnv-v0', 83 | entry_point='universe.envs:DummyVNCEnv', 84 | tags={ 85 | 'vnc': True, 86 | }, 87 | ) 88 | 89 | env = gym.make('test.NoLimitDummyVNCEnv-v0') 90 | env.configure(_n=1) 91 | env = wrappers.TimeLimit(env) 92 | env.reset() 93 | 94 | assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS 95 | assert env._max_episode_steps == None 96 | -------------------------------------------------------------------------------- /universe/spaces/joystick_action_space.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.spaces import Box 3 | from universe.spaces import joystick_event 4 | from gym.spaces import prng 5 | from collections import OrderedDict 6 | 7 | 8 | class JoystickActionSpace(gym.Space): 9 | """ 10 | Programmable joystick - currently Windows-only => mapped to vJoy 11 | """ 12 | def __init__(self, axis_x=False, axis_y=False, axis_z=False, axis_rx=False, axis_ry=False, axis_rz=False, 13 | slider_0=False, slider_1=False): 14 | self.event_space_map = OrderedDict() 15 | 16 | if axis_x: 17 | self.axis_x = box_axis() 18 | self.event_space_map[joystick_event.JoystickAxisXEvent] = self.axis_x 19 | if axis_y: 20 | self.axis_y = box_axis() 21 | self.event_space_map[joystick_event.JoystickAxisYEvent] = self.axis_y 22 | if axis_z: 23 | self.axis_z = box_axis() 24 | self.event_space_map[joystick_event.JoystickAxisZEvent] = self.axis_z 25 | if axis_rx: 26 | self.axis_rx = box_axis() 27 | self.event_space_map[joystick_event.JoystickAxisRxEvent] = self.axis_rx 28 | if axis_ry: 29 | self.axis_ry = box_axis() 30 | self.event_space_map[joystick_event.JoystickAxisRyEvent] = self.axis_ry 31 | if axis_rz: 32 | self.axis_rz = box_axis() 33 | self.event_space_map[joystick_event.JoystickAxisRzEvent] = self.axis_rz 34 | if slider_0: 35 | self.slider_0 = box_axis() 36 | self.event_space_map[joystick_event.JoystickSlider0Event] = self.slider_0 37 | if slider_1: 38 | self.slider_1 = box_axis() 39 | self.event_space_map[joystick_event.JoystickSlider1Event] = self.slider_1 40 | # TODO: Add buttons (similar to a vnc_event.KeyEvent - but 1..32) 41 | # TODO: Add POV hats 42 | 43 | def contains(self, action): 44 | if not isinstance(action, list): 45 | return False 46 | for a in action: 47 | if isinstance(a, joystick_event.JoystickAxisEvent): 48 | axis = self.event_space_map[a] 49 | if not axis.contains(a): 50 | return False 51 | return True 52 | 53 | def sample(self): 54 | event_type_index = prng.np_random.randint(len(self.event_space_map)) 55 | event_type = list(self.event_space_map.keys())[event_type_index] 56 | if event_type.__bases__[0] == joystick_event.JoystickAxisEvent: 57 | event = [event_type(self.event_space_map[event_type].sample()[0])] 58 | else: 59 | raise JoystickActionSpaceException('Unexpected event type') 60 | return event 61 | 62 | 63 | class JoystickActionSpaceException(Exception): 64 | pass 65 | 66 | 67 | def box_axis(): 68 | return Box(-1.0, 1.0, shape=(1,)) 69 | -------------------------------------------------------------------------------- /universe/rewarder/connection_timer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import signal 4 | import time 5 | 6 | from universe import error 7 | from universe.twisty import reactor 8 | from twisted.internet import defer, protocol 9 | import twisted.internet.error 10 | import logging 11 | 12 | logger = logging.getLogger(__name__) 13 | extra_logger = logging.getLogger('universe.extra.'+__name__) 14 | 15 | class ConnectionTimer(protocol.Protocol): 16 | def connectionMade(self): 17 | self.transport.loseConnection() 18 | 19 | def start(endpoint): 20 | start = time.time() 21 | return endpoint.connect( 22 | protocol.ClientFactory.forProtocol(ConnectionTimer) 23 | ).addCallback(lambda _: time.time() - start) 24 | 25 | def measure_clock_skew(label, host): 26 | cmd = ['ntpdate', '-q', '-p', '8', host] 27 | extra_logger.info('[%s] Starting network calibration with %s', label, ' '.join(cmd)) 28 | skew = Clockskew(label, cmd) 29 | # TODO: search PATH for this? 30 | process = reactor.spawnProcess(skew, '/usr/sbin/ntpdate', cmd, {}) 31 | # process = reactor.spawnProcess(skew, '/bin/sleep', ['sleep', '2'], {}) 32 | 33 | t = float(os.environ.get('UNIVERSE_NTPDATE_TIMEOUT', 20)) 34 | def timeout(): 35 | if process.pid: 36 | logger.error('[%s] %s call timed out after %ss; killing the subprocess. This is ok, but you could have more accurate timings by enabling UDP port 123 traffic to your env. (Alternatively, you can try increasing the timeout by setting environment variable UNIVERSE_NTPDATE_TIMEOUT=10.)', label, ' '.join(cmd), t) 37 | process.signalProcess(signal.SIGKILL) 38 | process.reapProcess() 39 | # TODO: make this part of the connection string 40 | reactor.callLater(t, timeout) 41 | return skew.deferred 42 | 43 | class Clockskew(protocol.ProcessProtocol): 44 | def __init__(self, label, cmd): 45 | self.label = label 46 | self._cmd = cmd 47 | 48 | self.deferred = defer.Deferred() 49 | self.out = [] 50 | self.err = [] 51 | 52 | def outReceived(self, data): 53 | self.out.append(data) 54 | 55 | def errReceived(self, data): 56 | self.err.append(data) 57 | 58 | def processExited(self, reason): 59 | if isinstance(reason.value, twisted.internet.error.ProcessDone): 60 | out = b''.join(self.out).decode('utf-8') 61 | match = re.search('offset ([\d.-]+) sec', out) 62 | if match is not None: 63 | offset = float(match.group(1)) 64 | self.deferred.callback(offset) 65 | else: 66 | self.deferred.errback(error.Error('Could not parse offset: %s', out)) 67 | else: 68 | err = b''.join(self.err) 69 | self.deferred.errback(error.Error('{} failed with status {}: stderr={!r}'.format(self._cmd, reason.value.exitCode, err))) 70 | 71 | class ConnectionTimerException(Exception): 72 | pass 73 | -------------------------------------------------------------------------------- /universe/envs/dummy_vnc_env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy as np 3 | 4 | from gym.utils import reraise 5 | 6 | from universe import error, rewarder, spaces, utils, vectorized 7 | from universe.envs import diagnostics 8 | from universe.remotes import healthcheck 9 | from universe.runtimes import registration 10 | 11 | class DummyVNCEnv(vectorized.Env): 12 | """ 13 | A simple env for unit testing that does nothing, but looks like a VNC env. 14 | It accepts any actions, and returns black screens. 15 | It also returns the actions in the observation, so you can test that action wrappers are producing the right answers 16 | For example, to test that YourActionWrapper converts example_input_action to example_output_action: 17 | 18 | >>> dummy_env = gym.make('test.DummyVNCEnv-v0') 19 | >>> e = YourActionWrapper(dummy_env) 20 | >>> e = universe.wrappers.Unvectorize(e) 21 | >>> observation, reward, done, info = e.step(example_input_action) 22 | >>> assert observation['action'] == example_output_action 23 | 24 | """ 25 | metadata = { 26 | 'render.modes': ['human'], # we wrap with a Render which can render to rgb_array 27 | 'semantics.async': True, 28 | 'semantics.autoreset': True, 29 | 'video.frames_per_second' : 60, 30 | 'runtime.vectorized': True, 31 | } 32 | 33 | def __init__(self): 34 | self._started = False 35 | 36 | self.observation_space = spaces.VNCObservationSpace() 37 | self.action_space = spaces.VNCActionSpace() 38 | 39 | def configure(self, remotes=None, 40 | client_id=None, 41 | start_timeout=None, docker_image=None, 42 | ignore_clock_skew=False, disable_action_probes=False, 43 | vnc_driver=None, vnc_kwargs={}, 44 | replace_on_crash=False, allocate_sync=True, 45 | observer=False, 46 | _n=3, 47 | ): 48 | self.n = _n 49 | self._reward_buffers = [rewarder.RewardBuffer('dummy:{}'.format(i)) for i in range(self.n)] 50 | self._started = True 51 | 52 | def _reset(self): 53 | return [None] * self.n 54 | 55 | def _step(self, action_n): 56 | assert self.n == len(action_n), "Expected {} actions but received {}: {}".format(self.n, len(action_n), action_n) 57 | 58 | observation_n = [{ 59 | 'vision': np.zeros((1024, 768, 3), dtype=np.uint8), 60 | 'text': [], 61 | 'action': action_n[i] 62 | } for i in range(self.n)] 63 | 64 | reward_n = [] 65 | done_n = [] 66 | info_n = [] 67 | for reward_buffer in self._reward_buffers: 68 | reward, done, info = reward_buffer.pop() 69 | reward_n.append(reward) 70 | done_n.append(done) 71 | info_n.append(info) 72 | return observation_n, reward_n, done_n, {'n': info_n} 73 | 74 | def __str__(self): 75 | return 'DummyVNCEnv' 76 | -------------------------------------------------------------------------------- /universe/spaces/vnc_event.py: -------------------------------------------------------------------------------- 1 | import string 2 | from universe import error 3 | from universe.vncdriver import constants 4 | 5 | class VNCEvent(object): 6 | pass 7 | 8 | def keycode(key): 9 | if key in constants.KEYMAP: 10 | return constants.KEYMAP.get(key) 11 | elif len(key) == 1: 12 | return ord(key) 13 | else: 14 | raise error.Error('Not sure how to translate to keycode: {!r}'.format(key)) 15 | 16 | class KeyEvent(VNCEvent): 17 | _keysym_to_name = {} 18 | for key, value in constants.KEYMAP.items(): 19 | _keysym_to_name[value] = key 20 | for c in string.printable: 21 | _keysym_to_name[ord(c)] = c 22 | 23 | @classmethod 24 | def build(cls, keys, down=None): 25 | """Build a key combination, such as: 26 | 27 | ctrl-t 28 | """ 29 | codes = [] 30 | for key in keys.split('-'): 31 | key = keycode(key) 32 | codes.append(key) 33 | 34 | events = [] 35 | if down is None or down: 36 | for code in codes: 37 | events.append(cls(code, down=True)) 38 | 39 | if down is None or not down: 40 | for code in reversed(codes): 41 | events.append(cls(code, down=False)) 42 | return events 43 | 44 | @classmethod 45 | def by_name(cls, key, down=None): 46 | return cls(keycode(key), down=down) 47 | 48 | def __init__(self, key, down=True): 49 | # TODO: validate key 50 | self.key = key 51 | self.down = bool(down) 52 | 53 | def compile(self): 54 | return 'KeyEvent', self.key, self.down 55 | 56 | def __repr__(self): 57 | if self.down: 58 | direction = 'down' 59 | else: 60 | direction = 'up' 61 | name = self._keysym_to_name.get(self.key) 62 | if not name: 63 | name = '0x{:x}'.format(self.key) 64 | else: 65 | name = '{} (0x{:x})'.format(name, self.key) 66 | return 'KeyEvent'.format(name, direction) 67 | 68 | def __str__(self): 69 | return repr(self) 70 | 71 | def __hash__(self): 72 | return (self.key, self.down).__hash__() 73 | 74 | def __eq__(self, other): 75 | return type(other) == type(self) and \ 76 | other.key == self.key and \ 77 | other.down == self.down 78 | 79 | @property 80 | def key_name(self): 81 | """Human readable name""" 82 | return self._keysym_to_name.get(self.key) 83 | 84 | class PointerEvent(VNCEvent): 85 | def __init__(self, x, y, buttonmask=0): 86 | self.x = x 87 | self.y = y 88 | self.buttonmask = buttonmask 89 | 90 | def compile(self): 91 | return 'PointerEvent', self.x, self.y, self.buttonmask 92 | 93 | def __repr__(self): 94 | return 'PointerEvent'.format(self.x, self.y, self.buttonmask) 95 | 96 | def __str__(self): 97 | return repr(self) 98 | -------------------------------------------------------------------------------- /universe/wrappers/vectorize.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import weakref 3 | 4 | from universe import error 5 | from universe.vectorized import core 6 | 7 | class Vectorize(gym.Wrapper): 8 | """ 9 | Given an unvectorized environment (where, e.g., the output of .step() is an observation 10 | rather than a list of observations), turn it into a vectorized environment with a batch of size 11 | 1. 12 | """ 13 | 14 | metadata = {'runtime.vectorized': True} 15 | 16 | def __init__(self, env): 17 | super(Vectorize, self).__init__(env) 18 | assert not env.metadata.get('runtime.vectorized') 19 | assert self.metadata.get('runtime.vectorized') 20 | self.n = 1 21 | 22 | def _reset(self): 23 | observation = self.env.reset() 24 | return [observation] 25 | 26 | def _step(self, action): 27 | observation, reward, done, info = self.env.step(action[0]) 28 | return [observation], [reward], [done], {'n': [info]} 29 | 30 | def _seed(self, seed): 31 | return [self.env.seed(seed[0])] 32 | 33 | class Unvectorize(core.Wrapper): 34 | """ 35 | Take a vectorized environment with a batch of size 1 and turn it into an unvectorized environment. 36 | """ 37 | autovectorize = False 38 | metadata = {'runtime.vectorized': False} 39 | 40 | def _reset(self): 41 | observation_n = self.env.reset() 42 | assert(len(observation_n) == 1) 43 | return observation_n[0] 44 | 45 | def _step(self, action): 46 | action_n = [action] 47 | observation_n, reward_n, done_n, info = self.env.step(action_n) 48 | return observation_n[0], reward_n[0], done_n[0], info['n'][0] 49 | 50 | def _seed(self, seed): 51 | return self.env.seed([seed])[0] 52 | 53 | class WeakUnvectorize(Unvectorize): 54 | def __init__(self, env, i): 55 | self._env_ref = weakref.ref(env) 56 | super(WeakUnvectorize, self).__init__(env) 57 | # WeakUnvectorize won't get configure called on it 58 | self.i = i 59 | 60 | def _check_for_duplicate_wrappers(self): 61 | pass # Disable this check because we need to wrap vectorized envs in multiple unvectorize wrappers 62 | 63 | @property 64 | def env(self): 65 | # Called upon instantiation 66 | if not hasattr(self, '_env_ref'): 67 | return 68 | 69 | env = self._env_ref() 70 | if env is None: 71 | raise error.Error("env has been garbage collected. To keep using WeakUnvectorize, you must keep around a reference to the env object. (HINT: try assigning the env to a variable in your code.)") 72 | return env 73 | 74 | @env.setter 75 | def env(self, value): 76 | # We'll maintain our own weakref, thank you very much. 77 | pass 78 | 79 | def _seed(self, seed): 80 | # We handle the seeding ourselves in the vectorized Monitor 81 | return [seed] 82 | 83 | def close(self): 84 | # Don't want to close through this wrapper 85 | pass 86 | -------------------------------------------------------------------------------- /universe/scoreboard/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.benchmarks import scoring 2 | from gym.benchmarks import register_benchmark 3 | 4 | register_benchmark( 5 | id='Atari7VNC-v0', 6 | scorer=scoring.TotalReward(), 7 | name='AtariVNC', 8 | description='7 Atari games, with pixel observations (using universe)', 9 | tasks=[ 10 | { 11 | "env_id": "VNCBeamRider-v3", 12 | "trials": 1, 13 | "max_timesteps": 10000000 14 | }, 15 | { 16 | "env_id": "VNCBreakout-v3", 17 | "trials": 1, 18 | "max_timesteps": 10000000 19 | }, 20 | { 21 | "env_id": "VNCEnduro-v3", 22 | "trials": 1, 23 | "max_timesteps": 10000000 24 | }, 25 | { 26 | "env_id": "gym-core.Pong-v3", 27 | "trials": 1, 28 | "max_timesteps": 10000000 29 | }, 30 | { 31 | "env_id": "VNCQbert-v3", 32 | "trials": 1, 33 | "max_timesteps": 10000000 34 | }, 35 | { 36 | "env_id": "VNCSeaquest-v3", 37 | "trials": 1, 38 | "max_timesteps": 10000000 39 | }, 40 | { 41 | "env_id": "VNCSpaceInvaders-v3", 42 | "trials": 1, 43 | "max_timesteps": 10000000 44 | } 45 | ]) 46 | 47 | register_benchmark( 48 | id='FlashRacing-v0', 49 | scorer=scoring.RewardPerTime(), 50 | name='FlashRacing', 51 | description='7 flash racing games, goal is best score per time', 52 | tasks=[ 53 | {'env_id': 'flashgames.NeonRace-v0', 54 | 'trials': 1, 55 | 'max_timesteps': 5000000, 56 | 'reward_floor': 175.0, 57 | 'reward_ceiling': 1700.0, 58 | }, 59 | {'env_id': 'flashgames.CoasterRacer-v0', 60 | 'trials': 1, 61 | 'max_timesteps': 5000000, 62 | 'reward_floor': 17.0, 63 | 'reward_ceiling': 400.0, 64 | }, 65 | {'env_id': 'flashgames.HeatRushUsa-v0', 66 | 'trials': 1, 67 | 'max_timesteps': 5000000, 68 | 'reward_floor': 150.0, 69 | 'reward_ceiling': 700.0, 70 | }, 71 | {'env_id': 'flashgames.FormulaRacer-v0', 72 | 'trials': 1, 73 | 'max_timesteps': 5000000, 74 | 'reward_floor': 0.27, 75 | 'reward_ceiling': 1.0, 76 | }, 77 | {'env_id': 'flashgames.DuskDrive-v0', 78 | 'trials': 1, 79 | 'max_timesteps': 5000000, 80 | 'reward_floor': 5000.0, 81 | 'reward_ceiling': 15000.0, 82 | }, 83 | {'env_id': 'flashgames.SpacePunkRacer-v0', 84 | 'trials': 1, 85 | 'max_timesteps': 5000000, 86 | 'reward_floor': 0.67, 87 | 'reward_ceiling': 2.25, 88 | }, 89 | {'env_id': 'flashgames.NeonRace2-v0', 90 | 'trials': 1, 91 | 'max_timesteps': 5000000, 92 | 'reward_floor': 0.0, 93 | 'reward_ceiling': 1200.0, 94 | } 95 | ]) 96 | -------------------------------------------------------------------------------- /universe/remotes/compose/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import unicode_literals 3 | 4 | import codecs 5 | import hashlib 6 | import json 7 | import json.decoder 8 | 9 | import six 10 | 11 | 12 | json_decoder = json.JSONDecoder() 13 | 14 | 15 | def get_output_stream(stream): 16 | if six.PY3: 17 | return stream 18 | return codecs.getwriter('utf-8')(stream) 19 | 20 | 21 | def stream_as_text(stream): 22 | """Given a stream of bytes or text, if any of the items in the stream 23 | are bytes convert them to text. 24 | 25 | This function can be removed once docker-py returns text streams instead 26 | of byte streams. 27 | """ 28 | for data in stream: 29 | if not isinstance(data, six.text_type): 30 | data = data.decode('utf-8', 'replace') 31 | yield data 32 | 33 | 34 | def line_splitter(buffer, separator=u'\n'): 35 | index = buffer.find(six.text_type(separator)) 36 | if index == -1: 37 | return None 38 | return buffer[:index + 1], buffer[index + 1:] 39 | 40 | 41 | def split_buffer(stream, splitter=None, decoder=lambda a: a): 42 | """Given a generator which yields strings and a splitter function, 43 | joins all input, splits on the separator and yields each chunk. 44 | 45 | Unlike string.split(), each chunk includes the trailing 46 | separator, except for the last one if none was found on the end 47 | of the input. 48 | """ 49 | splitter = splitter or line_splitter 50 | buffered = six.text_type('') 51 | 52 | for data in stream_as_text(stream): 53 | buffered += data 54 | while True: 55 | buffer_split = splitter(buffered) 56 | if buffer_split is None: 57 | break 58 | 59 | item, buffered = buffer_split 60 | yield item 61 | 62 | if buffered: 63 | yield decoder(buffered) 64 | 65 | 66 | def json_splitter(buffer): 67 | """Attempt to parse a json object from a buffer. If there is at least one 68 | object, return it and the rest of the buffer, otherwise return None. 69 | """ 70 | try: 71 | obj, index = json_decoder.raw_decode(buffer) 72 | rest = buffer[json.decoder.WHITESPACE.match(buffer, index).end():] 73 | return obj, rest 74 | except ValueError: 75 | return None 76 | 77 | 78 | def json_stream(stream): 79 | """Given a stream of text, return a stream of json objects. 80 | This handles streams which are inconsistently buffered (some entries may 81 | be newline delimited, and others are not). 82 | """ 83 | return split_buffer(stream, json_splitter, json_decoder.decode) 84 | 85 | 86 | def json_hash(obj): 87 | dump = json.dumps(obj, sort_keys=True, separators=(',', ':')) 88 | h = hashlib.sha256() 89 | h.update(dump.encode('utf8')) 90 | return h.hexdigest() 91 | 92 | 93 | def microseconds_from_time_nano(time_nano): 94 | return int(time_nano % 1000000000 / 1000) 95 | 96 | 97 | def build_string_dict(source_dict): 98 | return dict((k, str(v if v is not None else '')) for k, v in source_dict.items()) 99 | -------------------------------------------------------------------------------- /universe/rewarder/merge.py: -------------------------------------------------------------------------------- 1 | from universe import error 2 | import six 3 | 4 | def merge_infos(info1, info2): 5 | """We often need to aggregate together multiple infos. Most keys can 6 | just be clobbered by the new info, but e.g. any keys which contain 7 | counts should be added. The merge schema is indicated by the key 8 | namespace. 9 | 10 | Namespaces: 11 | 12 | - stats.timers: Timing 13 | - stats.gauges: Gauge values 14 | - stats.*: Counts of a quantity 15 | """ 16 | for key, value in six.iteritems(info2): 17 | if key in info1 and key.startswith('stats'): 18 | if key.startswith('stats.timers'): 19 | # timer 20 | info1[key] += value 21 | elif key.startswith('stats.gauges'): 22 | # gauge 23 | info1[key] = value 24 | else: 25 | # counter 26 | info1[key] += value 27 | else: 28 | info1[key] = value 29 | 30 | def merge_reward_n(accum_reward_n, reward_n): 31 | for i in range(len(reward_n)): 32 | if reward_n[i] is not None: 33 | # Add rewards 34 | accum_reward_n[i] += reward_n[i] 35 | 36 | def merge_done_n(accum_done_n, done_n): 37 | for i in range(len(done_n)): 38 | # Copy over done if the episode is indeed none 39 | if done_n[i]: 40 | accum_done_n[i] = done_n[i] 41 | 42 | def _merge_observation(accum_observation, observation): 43 | """ 44 | Old visual observation is discarded, because it is outdated frame. 45 | Text observations are merged, because they are messages sent from the rewarder. 46 | """ 47 | if observation is None: 48 | # We're currently masking. So accum_observation probably 49 | # belongs to the previous episode. We may lose a "text" 50 | # observation from the previous episode, but that's ok. 51 | return None 52 | elif accum_observation is None: 53 | # Nothing to merge together 54 | return observation 55 | 56 | accum_observation['vision'] = observation.get('vision') 57 | accum_observation['text'] = accum_observation.get('text', []) + observation.get('text', []) 58 | return accum_observation 59 | 60 | def merge_observation_n(accum_observation_n, observation_n): 61 | # Merge observations. 62 | for i in range(len(accum_observation_n)): 63 | accum_observation_n[i] = _merge_observation(accum_observation_n[i], observation_n[i]) 64 | 65 | def merge_n( 66 | accum_observation_n, accum_reward_n, accum_done_n, accum_info, 67 | observation_n, reward_n, done_n, info, 68 | ): 69 | # Merge observation/reward/done 70 | merge_observation_n(accum_observation_n, observation_n) 71 | merge_reward_n(accum_reward_n, reward_n) 72 | merge_done_n(accum_done_n, done_n) 73 | 74 | # Merge together infos. We deep merge the 'n' key and do a 75 | # simple merge on everything else. 76 | accum_info_n = accum_info['n'] 77 | for accum_info_i, info_i in zip(accum_info_n, info['n']): 78 | merge_infos(accum_info_i, info_i) 79 | 80 | merge_infos(accum_info, info) 81 | accum_info['n'] = accum_info_n 82 | -------------------------------------------------------------------------------- /universe/wrappers/multiprocessing_env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy as np 3 | from universe import vectorized 4 | from universe.wrappers import render 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | def WrappedMultiprocessingEnv(env_id): 9 | return render.Render(EpisodeID(vectorized.MultiprocessingEnv(env_id))) 10 | 11 | class RemoveNones(vectorized.Wrapper): 12 | """The vectorized environment will return None for any indexes that 13 | have already exceeded their episode count (not to be confused with 14 | the Nones returned by resetting environments in the real-time 15 | case). For convenience, we instead return a plausible observation 16 | in each such slot. 17 | """ 18 | def __init__(self, env): 19 | super(RemoveNones, self).__init__(env) 20 | self.plausible_observation = None 21 | 22 | def _reset(self): 23 | observation_n = self.env.reset() 24 | self.plausible_observation = observation_n[0] 25 | return observation_n 26 | 27 | def _step(self, action_n): 28 | observation_n, reward_n, done_n, info = self.env.step(action_n) 29 | observation_n = [ob if ob is not None else self.plausible_observation for ob in observation_n] 30 | return observation_n, reward_n, done_n, info 31 | 32 | class EpisodeID(vectorized.Wrapper): 33 | metadata = { 34 | 'configure.required': True 35 | } 36 | """ 37 | For each episode, return its id, and also return the total number of contiguous 38 | episodes that are now done. 39 | """ 40 | def configure(self, episode_limit=None, **kwargs): 41 | self.env.configure(**kwargs) 42 | self.episode_limit = episode_limit 43 | self._clear_state() 44 | 45 | def _clear_state(self): 46 | self.done_to = -1 47 | self.extra_done = set() 48 | self.episode_ids = list(range(self.n)) 49 | 50 | def _set_done_to(self): 51 | while True: 52 | next_done_to = self.done_to + 1 53 | if next_done_to in self.extra_done: 54 | self.done_to = next_done_to 55 | self.extra_done.remove(next_done_to) 56 | else: 57 | break 58 | 59 | def _reset(self): 60 | self._clear_state() 61 | return self.env.reset() 62 | 63 | def _step(self, action_n): 64 | observation_n, reward_n, done_n, info = self.env.step(action_n) 65 | # Pass along ID of potentially-done episode 66 | for i, info_i in enumerate(info['n']): 67 | info_i['vectorized.episode_id'] = self.episode_ids[i] 68 | 69 | done_i = np.argwhere(done_n).reshape(-1) 70 | if len(done_i): 71 | for i in done_i: 72 | self.extra_done.add(self.episode_ids[i]) 73 | # Episode completed, so we bump its value 74 | self.episode_ids[i] += self.n 75 | if self.episode_limit is not None and self.episode_ids[i] > self.episode_limit: 76 | logger.debug('Masking: index=%s episode_id=%s', i, self.episode_ids[i]) 77 | self.env.mask(i) 78 | self._set_done_to() 79 | 80 | # Pass along the number of contiguous episodes that are now done 81 | info['vectorized.done_to'] = self.done_to 82 | return observation_n, reward_n, done_n, info 83 | -------------------------------------------------------------------------------- /tests/functional/test_envs.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pytest 4 | import re 5 | 6 | import gym 7 | from universe import wrappers 8 | from universe.runtimes import registration 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | # Choose a sample from each category 13 | # TODO: Add more comprehensive test that runs all envs 14 | test_envs = [ 15 | # 'gym-core.PongShortSync-v3', 16 | # 'gym-core.CartPoleLowDSync-v0', 17 | 'flashgames.DuskDrive-v0', 18 | 'internet.SlitherIO-v0', 19 | # 'wob.DragBox-v0', 20 | ] 21 | 22 | @pytest.mark.parametrize('env_id', test_envs) 23 | def test_smoke(env_id): 24 | """Check that environments start up without errors and that we can extract rewards and observations""" 25 | gym.undo_logger_setup() 26 | logging.getLogger().setLevel(logging.INFO) 27 | 28 | env = gym.make(env_id) 29 | if env.metadata.get('configure.required', False): 30 | if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'): # Used to test universe-envs in CI 31 | configure_with_latest_docker_runtime_tag(env) 32 | else: 33 | env.configure(remotes=1) 34 | 35 | env = wrappers.Unvectorize(env) 36 | 37 | env.reset() 38 | _rollout(env, timestep_limit=60*30) # Check a rollout 39 | 40 | def _rollout(env, timestep_limit=None): 41 | """ 42 | Test that a rollout follows our desired format. Includes the following checks: 43 | 44 | 1. The environment resets and provides an observation within our timestep_limit 45 | 2. Done signals map to the following: 46 | 47 | done=True => Episode over (sent once at end of episode) 48 | done=None => Resetting, agent takes no actions until done=False again 49 | done=False => Episode is running, agent should take actions 50 | """ 51 | count = 0 52 | episode_state = "resetting" 53 | 54 | while True: 55 | obs, reward, done, info = env.step([]) # Step with noop action 56 | count += 1 57 | 58 | if episode_state == 'resetting': 59 | if done is None: # Still resetting 60 | assert obs is None 61 | continue 62 | elif done is False: 63 | episode_state = 'running' 64 | 65 | if episode_state == 'running': 66 | assert done is False 67 | assert isinstance(reward, float) 68 | assert isinstance(done, bool), "Received done=None before done=True" 69 | # TODO: Remove this None check after we fix done=None semantics 70 | if obs is not None: 71 | assert obs['vision'].shape == (768, 1024, 3) 72 | break 73 | 74 | if timestep_limit is not None and count >= timestep_limit: 75 | assert episode_state == 'running', "Failed to finish resetting in timestep limit" 76 | break 77 | 78 | # if timestep_limit is not None and count >= timestep_limit: 79 | # self.assertTrue(completed_full_episode, "Failed to complete a full episode in timestep limit") 80 | # break 81 | 82 | def configure_with_latest_docker_runtime_tag(env): 83 | original_image = registration.runtime_spec(env.spec.tags['runtime']).image 84 | latest_image = re.sub(r':.*', ':latest', original_image) 85 | logger.info("Using latest image: {}".format(latest_image)) 86 | env.configure(remotes=1, docker_image=latest_image) 87 | -------------------------------------------------------------------------------- /universe/spaces/vnc_action_space.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import string 3 | 4 | from gym.spaces import prng 5 | 6 | from universe.vncdriver import constants 7 | from universe.spaces import vnc_event 8 | 9 | class VNCActionSpace(gym.Space): 10 | """The space of VNC actions. 11 | 12 | You can submit a list of KeyEvents or PointerEvents. KeyEvents 13 | correspond to pressing or releasing a key. PointerEvents correspond 14 | to moving to a specific pixel, and setting the mouse buttons to some state 15 | (buttonmask is a bitmap corresponding to which buttons are down). 16 | 17 | Note that key releases work differently from click releases: keys 18 | are stateful and must be explicitly released, while the state of 19 | the mouse buttons is provided at each timestep, so you have to 20 | explicitly keep the mouse down. 21 | 22 | Attributes: 23 | keys (list): The allowed key presses 24 | buttonmasks (list): The allowed buttonmasks (i.e. mouse presses) 25 | screen_shape (int, int): The X and Y dimensions of the screen 26 | """ 27 | 28 | def __init__(self, keys=None, buttonmasks=None, screen_shape=(1024, 728)): 29 | self.keys = [] 30 | if keys is None: 31 | keys = [c for c in string.printable] + list(constants.KEYMAP.keys()) 32 | for key in (keys or []): 33 | down = vnc_event.KeyEvent.by_name(key, down=True) 34 | up = vnc_event.KeyEvent.by_name(key, down=False) 35 | self.keys.append(down) 36 | self.keys.append(up) 37 | self._key_set = set(self.keys) 38 | 39 | self.screen_shape = screen_shape 40 | if self.screen_shape is not None: 41 | self.buttonmasks = [] 42 | if buttonmasks is None: 43 | buttonmasks = range(256) 44 | for buttonmask in buttonmasks: 45 | self.buttonmasks.append(buttonmask) 46 | self._buttonmask_set = set(self.buttonmasks) 47 | 48 | def contains(self, action): 49 | if not isinstance(action, list): 50 | return False 51 | 52 | for a in action: 53 | if isinstance(a, vnc_event.KeyEvent): 54 | if a not in self._key_set: 55 | return False 56 | elif isinstance(a, vnc_event.PointerEvent): 57 | if self.screen_shape is None: 58 | return False 59 | 60 | if a.x < 0 or a.x > self.screen_shape[0]: 61 | return False 62 | elif a.y < 0 or a.y > self.screen_shape[1]: 63 | return False 64 | elif a.buttonmask not in self._buttonmask_set: 65 | return False 66 | 67 | return True 68 | 69 | def sample(self): 70 | # Both key and pointer allowed 71 | if self.screen_shape is not None: 72 | event_type = prng.np_random.randint(2) 73 | else: 74 | event_type = 0 75 | 76 | if event_type == 0: 77 | # Let's press a key 78 | key = prng.np_random.choice(self.keys) 79 | event = [key] 80 | else: 81 | x = prng.np_random.randint(self.screen_shape[0]) 82 | y = prng.np_random.randint(self.screen_shape[1]) 83 | buttonmask = prng.np_random.choice(self.buttonmasks) 84 | 85 | event = [vnc_event.PointerEvent(x, y, buttonmask)] 86 | return event 87 | -------------------------------------------------------------------------------- /universe/rewarder/tests/test_reward_buffer.py: -------------------------------------------------------------------------------- 1 | from universe.rewarder import reward_buffer 2 | 3 | def test_prereset(): 4 | buf = reward_buffer.RewardBuffer('buf') 5 | buf.push('1', 2, False, {'key': 'value'}) 6 | reward, done, info = buf.pop() 7 | assert reward == 0 8 | assert done is False 9 | print(info) 10 | 11 | def test_mask_peek(): 12 | buf = reward_buffer.RewardBuffer('buf') 13 | buf.set_env_info('running', 'test-v0', '1', fps=60) 14 | buf.push('1', 1, False, {'key': 'value'}) 15 | reward, done, info = buf.pop(peek=True) 16 | assert info['env_status.episode_id'] is None 17 | assert info['env_status.env_state'] is None 18 | assert info['env_status.peek.episode_id'] is None 19 | assert info['env_status.peek.env_state'] is None 20 | 21 | def test_single(): 22 | buf = reward_buffer.RewardBuffer('buf') 23 | buf.reset('1') 24 | buf.push('1', 1, False, {'key': 'value'}) 25 | reward, done, info = buf.pop() 26 | assert reward == 1.0 27 | assert done is False 28 | assert info['key'] == 'value' 29 | assert info['env_status.episode_id'] == '1' 30 | assert info['env_status.reset.episode_id'] == '1' 31 | assert info['env.text'] == [] 32 | 33 | def test_multiple(): 34 | buf = reward_buffer.RewardBuffer('buf') 35 | buf.reset('1') 36 | buf.push('1', 1, False, {'key': 'value1'}) 37 | buf.push_text('1', 'text1') 38 | 39 | buf.push('2', 2, False, {'key': 'value2'}) 40 | buf.push_text('2', 'text2') 41 | buf.push_text('2', 'text3') 42 | reward, done, info = buf.pop() 43 | assert reward == 1.0 # old 44 | assert done is True # old 45 | assert info['key'] == 'value1', 'Info: {}'.format(info) # old 46 | assert info['env_status.episode_id'] == '2', 'got: {}, expected: {}'.format(info['env_status.episode_id'], '1') 47 | assert info['env_status.complete.episode_id'] == '1' 48 | assert info['env_status.reset.episode_id'] == '1' 49 | assert info['env.text'] == ['text2', 'text3'] # new 50 | 51 | reward, done, info = buf.pop() 52 | assert reward == 2.0 # new 53 | assert done is False 54 | assert info['key'] == 'value2' 55 | assert info['env_status.episode_id'] == '2' 56 | assert 'env_status.reset.episode_id' not in info 57 | assert info['env.text'] == [] 58 | 59 | def test_double_reset(): 60 | buf = reward_buffer.RewardBuffer('buf') 61 | buf.reset('1') 62 | buf.set_env_info('running', 'test-v0', '1', fps=60) 63 | buf.push('1', 1, False, {'key': 'value1'}) 64 | buf.set_env_info('resetting', 'test-v0', '2', fps=60) 65 | buf.push('2', 20, False, {'key': 'value2'}) 66 | 67 | reward, done, info = buf.pop(peek=True) 68 | assert reward == 0 69 | assert done == False 70 | assert 'env_status.artificial.done' not in info 71 | assert info['env_status.episode_id'] == '1' 72 | assert info['env_status.env_state'] == 'running' 73 | assert info['env_status.peek.episode_id'] == '2' 74 | assert info['env_status.peek.env_state'] == 'resetting' 75 | 76 | buf.set_env_info('running', 'test-v0', '2', fps=60) 77 | 78 | reward, done, info = buf.pop(peek=True) 79 | assert reward == 0 80 | assert done == False 81 | assert 'env_status.artificial.done' not in info 82 | assert info['env_status.episode_id'] == '1' 83 | assert info['env_status.env_state'] == 'running' 84 | assert info['env_status.peek.episode_id'] == '2' 85 | assert info['env_status.peek.env_state'] == 'running' 86 | -------------------------------------------------------------------------------- /universe/remotes/compose/progress_stream.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import unicode_literals 3 | 4 | from universe.remotes.compose import utils 5 | 6 | 7 | class StreamOutputError(Exception): 8 | pass 9 | 10 | 11 | def stream_output(output, stream): 12 | is_terminal = hasattr(stream, 'isatty') and stream.isatty() 13 | stream = utils.get_output_stream(stream) 14 | all_events = [] 15 | lines = {} 16 | diff = 0 17 | 18 | for event in utils.json_stream(output): 19 | all_events.append(event) 20 | is_progress_event = 'progress' in event or 'progressDetail' in event 21 | 22 | if not is_progress_event: 23 | print_output_event(event, stream, is_terminal) 24 | stream.flush() 25 | continue 26 | 27 | if not is_terminal: 28 | continue 29 | 30 | # if it's a progress event and we have a terminal, then display the progress bars 31 | image_id = event.get('id') 32 | if not image_id: 33 | continue 34 | 35 | if image_id in lines: 36 | diff = len(lines) - lines[image_id] 37 | else: 38 | lines[image_id] = len(lines) 39 | stream.write("\n") 40 | diff = 0 41 | 42 | # move cursor up `diff` rows 43 | stream.write("%c[%dA" % (27, diff)) 44 | 45 | print_output_event(event, stream, is_terminal) 46 | 47 | if 'id' in event: 48 | # move cursor back down 49 | stream.write("%c[%dB" % (27, diff)) 50 | 51 | stream.flush() 52 | 53 | return all_events 54 | 55 | 56 | def print_output_event(event, stream, is_terminal): 57 | if 'errorDetail' in event: 58 | raise StreamOutputError(event['errorDetail']['message']) 59 | 60 | terminator = '' 61 | 62 | if is_terminal and 'stream' not in event: 63 | # erase current line 64 | stream.write("%c[2K\r" % 27) 65 | terminator = "\r" 66 | elif 'progressDetail' in event: 67 | return 68 | 69 | if 'time' in event: 70 | stream.write("[%s] " % event['time']) 71 | 72 | if 'id' in event: 73 | stream.write("%s: " % event['id']) 74 | 75 | if 'from' in event: 76 | stream.write("(from %s) " % event['from']) 77 | 78 | status = event.get('status', '') 79 | 80 | if 'progress' in event: 81 | stream.write("%s %s%s" % (status, event['progress'], terminator)) 82 | elif 'progressDetail' in event: 83 | detail = event['progressDetail'] 84 | total = detail.get('total') 85 | if 'current' in detail and total: 86 | percentage = float(detail['current']) / float(total) * 100 87 | stream.write('%s (%.1f%%)%s' % (status, percentage, terminator)) 88 | else: 89 | stream.write('%s%s' % (status, terminator)) 90 | elif 'stream' in event: 91 | stream.write("%s%s" % (event['stream'], terminator)) 92 | else: 93 | stream.write("%s%s\n" % (status, terminator)) 94 | 95 | 96 | def get_digest_from_pull(events): 97 | for event in events: 98 | status = event.get('status') 99 | if not status or 'Digest' not in status: 100 | continue 101 | 102 | _, digest = status.split(':', 1) 103 | return digest.strip() 104 | return None 105 | 106 | 107 | def get_digest_from_push(events): 108 | for event in events: 109 | digest = event.get('aux', {}).get('Digest') 110 | if digest: 111 | return digest 112 | return None 113 | -------------------------------------------------------------------------------- /universe/vncdriver/screen/pyglet_screen.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy as np 3 | import os 4 | from universe import pyprofile 5 | import sys 6 | 7 | from universe import error 8 | from universe.vncdriver import server_messages 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | class PygletScreen(object): 13 | def __init__(self, bitmap=None): 14 | self._window = None 15 | self._is_updated = False 16 | self._height, self._width, _ = bitmap.shape 17 | self._initialize() 18 | self.update_rectangle(0, 0, self._width, self._height, bitmap) 19 | 20 | def flip(self): 21 | if not self._is_updated: 22 | return 23 | self._is_updated = False 24 | 25 | self._window.clear() 26 | self._window.switch_to() 27 | self._window.dispatch_events() 28 | self.texture.blit(0, 0) 29 | self._window.flip() 30 | 31 | def _initialize(self): 32 | if not os.environ.get('DISPLAY') and sys.platform.startswith('linux'): 33 | raise error.Error("Cannot render with mode='human' with no DISPLAY variable set.") 34 | 35 | import pyglet 36 | self._window = pyglet.window.Window(width=self._width, height=self._height, visible=True) 37 | self._window.dispatch_events() 38 | self.texture = pyglet.image.Texture.create(width=self._width, height=self._height) 39 | 40 | def update_rectangle(self, x, y, width, height, data): 41 | bytes = data.tobytes() 42 | pyprofile.incr('vncdriver.pyglet_screen.blit') 43 | pyprofile.incr('vncdriver.pyglet_screen.blit.bytes', len(bytes), unit=pyprofile.BYTES) 44 | import pyglet 45 | image = pyglet.image.ImageData(width, height, 'RGB', bytes, pitch=width * -3) 46 | self.texture.blit_into(image, x, self._height-height-y, 0) 47 | self._is_updated = True 48 | 49 | def apply(self, framebuffer_update): 50 | pyprofile.push('vncdriver.pyglet_screen.apply') 51 | for rect in framebuffer_update.rectangles: 52 | if isinstance(rect.encoding, 53 | (server_messages.RAWEncoding, server_messages.ZRLEEncoding, server_messages.ZlibEncoding)): 54 | self.update_rectangle(rect.x, rect.y, rect.width, rect.height, rect.encoding.data) 55 | else: 56 | raise error.Error('Unrecognized encoding: {}'.format(rect.encoding)) 57 | pyprofile.pop() 58 | 59 | 60 | 61 | # # TODO: we don't seem to be able to have multiple independent 62 | # # windows at once 63 | # def update_rectangle(self, x, y, width, height, data): 64 | # self._update_rgbarray(x, y, width, height, update) 65 | 66 | 67 | # def copy_rectangle(self, src_x, src_y, x, y, width, height): 68 | # assert self._window 69 | # rectangle = self.texture.get_region(src_x, self._height-height-src_y, width, height) 70 | # self.texture.blit_into(rectangle.get_image_data(), x, self._height-height-y, 0) 71 | 72 | # def fill_rectangle(self, x, y, width, height, color): 73 | # import pyglet 74 | # # While this technically works, it's super slow 75 | # update = np.frombuffer(color, dtype=np.uint8) 76 | # r, g, b = update[self._color_cycle] 77 | # image_pattern = pyglet.image.SolidColorImagePattern(color=(r, g, b, 0)) 78 | # image = image_pattern.create_image(width, height) 79 | # self.texture.blit_into(image, x, self._height-height-y, 0) 80 | 81 | # def commit(self): 82 | # self._window.clear() 83 | # self._window.switch_to() 84 | # self.texture.blit(0, 0) 85 | 86 | # self._is_updated = True 87 | -------------------------------------------------------------------------------- /universe/wrappers/monitoring.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import gym 4 | from universe.vectorized import core # Cannot import vectorized directly without inducing a cycle 5 | from universe.wrappers.time_limit import TimeLimit 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | class _UniverseMonitor(core.Wrapper): 10 | def __init__(self, env, directory, video_callable=None, force=False, 11 | resume=False, write_upon_reset=False, uid=None, mode=None): 12 | super(_UniverseMonitor, self).__init__(env) 13 | self.directory = directory 14 | self.video_callable = video_callable 15 | self.force = force 16 | self.resume = resume 17 | self.write_upon_reset = write_upon_reset 18 | self.uid = uid 19 | self.mode = mode 20 | # TODO if we want to monitor more than one instance in a vectorized 21 | # env we'll have to do this after configure() 22 | self._start_monitor() 23 | 24 | def _start_monitor(self): 25 | logger.info("Starting Monitor. Writing monitor logs to {}".format(self.directory)) 26 | 27 | # Circular dependencies :( 28 | from universe import wrappers 29 | # We need to maintain pointers to these to avoid them being 30 | # GC'd. They have a weak reference to us to avoid cycles. 31 | # TODO if we want to monitor more than one instance in a vectorized 32 | # env we'll need to actually fix WeakUnvectorize 33 | self._unvectorized_envs = [wrappers.WeakUnvectorize(self.env, i) for i in range(1)] 34 | 35 | # For now we only monitor the first env 36 | if hasattr(gym, 'wrappers'): 37 | self._monitor = gym.wrappers.Monitor(self._unvectorized_envs[0], 38 | directory=self.directory, 39 | video_callable=self.video_callable, 40 | force=self.force, 41 | resume=self.resume, 42 | write_upon_reset=self.write_upon_reset, 43 | uid=self.uid, 44 | mode=self.mode 45 | ) 46 | else: 47 | logger.warn("DEPRECATION WARNING: You are using an older version of gym that has a deprecated Monitor, please update to gym:v0.8.0. This change was made 2017/02/01 and is included in universe version 0.21.3") 48 | from gym import monitoring 49 | self._monitor = monitoring.MonitorManager(self._unvectorized_envs[0]) 50 | self._monitor.start( 51 | self.directory, 52 | self.video_callable, 53 | self.force, 54 | self.resume, 55 | self.write_upon_reset, 56 | self.uid, 57 | self.mode 58 | ) 59 | 60 | def _step(self, action_n): 61 | self._monitor._before_step(action_n[0]) 62 | observation_n, reward_n, done_n, info = self.env.step(action_n) 63 | done_n[0] = self._monitor._after_step(observation_n[0], reward_n[0], done_n[0], info) 64 | return observation_n, reward_n, done_n, info 65 | 66 | def _reset(self): 67 | self._monitor._before_reset() 68 | observation_n = self.env.reset() 69 | self._monitor._after_reset(observation_n[0]) 70 | return observation_n 71 | 72 | def _close(self): 73 | super(_UniverseMonitor, self)._close() 74 | self._monitor.close() 75 | 76 | def set_monitor_mode(self, mode): 77 | logger.info("Setting the monitor mode is deprecated and will be removed soon") 78 | self._monitor._set_mode(mode) 79 | 80 | def Monitor(env, directory, video_callable=None, force=False, resume=False, 81 | write_upon_reset=False, uid=None, mode=None): 82 | return _UniverseMonitor(TimeLimit(env), directory, video_callable, force, resume, 83 | write_upon_reset, uid, mode) 84 | -------------------------------------------------------------------------------- /doc/env_semantics.rst: -------------------------------------------------------------------------------- 1 | Environment semantics 2 | ********************* 3 | 4 | Real-time environments 5 | ====================== 6 | 7 | Universe environments differ from other Gym environments in that the 8 | environment keeps running in real-time, even when the agent does not 9 | call ``step``. This has a few important implications: 10 | 11 | * Actions and observations can no longer be considered to 12 | occur on a "clock tick". 13 | * An explicit call to ``reset`` is asynchronous and returns 14 | immediately, even though the environment has not yet finished 15 | resetting. (If you would prefer the ``reset`` call to block 16 | until the reset has finished, you can wrap 17 | the client-side environment with a `BlockingReset `__ wrapper) 18 | * Since the environment will not have waited to finish 19 | connecting to the VNC server before returning, the initial return 20 | values from ``reset`` will be ``None`` to indicate that there is 21 | not yet a valid observation. 22 | * An agent that successfully learns from a Universe environment cannot 23 | take "thinking breaks": it must keep sending actions to the 24 | environment at all times. 25 | * Lag and latency play a major role in your agent's ability to 26 | successfully learn in a given environment. The latency and profiling 27 | numbers returned in the ``info`` dictionary can provide important 28 | information for training. 29 | 30 | Vectorized API 31 | ============== 32 | 33 | The vectorized Gym API allows a single client-side environment to 34 | control a vector of remotes. The main difference with the 35 | non-vectorized Gym API is that individual environments will 36 | automatically reset upon reaching the end of an episode. (An episode 37 | is defined as ending when an agent has concretely succeeded or failed 38 | at the task, such as after clearing a level of a game, or losing the 39 | game. Some environments without clearly delineated success and 40 | failure conditions may not have episodes.) 41 | 42 | There are two API methods, ``reset`` and ``step``. The semantics are: 43 | 44 | - ``reset`` takes no arguments and returns a vector of observations: 45 | 46 | .. code:: python 47 | 48 | observation_n = env.reset() 49 | 50 | - ``step`` consumes a vector of actions, and returns a vector of 51 | observations, vector of rewards, vector of done booleans, and an 52 | info dictionary. The info dictionary has an ``n`` key, which 53 | contains a vector of infos specific to each env: 54 | 55 | .. code:: python 56 | 57 | observation_n, reward_n, done_n, info = env.step(action_n) 58 | # len(info['n']) == len(observation_n) 59 | 60 | Some important notes: 61 | 62 | - At any given moment, some of the environments may be 63 | resetting. Resetting environments will have a ``None`` value for 64 | their observation. For example, an ``observation_n`` of ``[None, 65 | {'vision': ...}, {'vision': ...}]`` indicates that the environment 66 | at index 0 is resetting. 67 | - When an index returns ``done=True``, the corresponding environment 68 | will automatically start resetting. 69 | - The user must call ``reset`` once before calling ``step``; undefined 70 | behavior will result if ``reset`` is not called. Further ``reset`` 71 | calls are allowed, but generally are used only if the environment has 72 | been idle for a while (such as with periodic evaluation), or when it 73 | is important to start at the beginning 74 | 75 | Versioning 76 | ========== 77 | 78 | The remote is versioned and has fixed semantics, assuming sufficient 79 | compute resources are applied (i.e. if you don't have enough CPU, your 80 | flash environments will likely behave differently). The client's exact 81 | semantics will depend on the version of universe you have installed, 82 | and you should track the version of that together with the rest of 83 | your agent code. 84 | 85 | -------------------------------------------------------------------------------- /example/recorders/botaction_recorder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This is a small server that accepts connections on a websocket port and writes it to a file. 4 | 5 | The purpose is to allow a universe-env with a built-in bot to record the actions it's taking 6 | as a demonstration. So the demonstration includes a botactions.jsonl file that gets used instead 7 | of the vnc client log. (The vnc client log is still recorded and needed to fully parse the VNC 8 | protocol.) 9 | 10 | It's much simpler than reward_recorder.py, because it doesn't have to also talk to the agent. 11 | It just takes json messages over a websocket and appends them separated by newlines to the log file. 12 | 13 | The ws port is 15986 unless overridden with --listen-address 14 | The log file is /tmp/demo/botactions.jsonl unless overridden with --botaction-logfile 15 | """ 16 | import argparse 17 | import logging 18 | import sys 19 | import json 20 | from autobahn.twisted import websocket 21 | from universe.twisty import reactor 22 | logger = logging.getLogger() 23 | 24 | class BotactionRecordingServer(websocket.WebSocketServerProtocol, object): 25 | 26 | _next_id = 1 27 | @classmethod 28 | def next_id(cls): 29 | id = cls._next_id 30 | cls._next_id += 1 31 | return id 32 | 33 | logfile_path='/tmp/demo/botactions.jsonl' 34 | 35 | def __init__(self): 36 | super(BotactionRecordingServer, self).__init__() 37 | self.id = self.next_id() 38 | self._closed = False 39 | self.file = None 40 | 41 | logger.info("[BotactionRecordingServer] [%d] Wrote version number", self.id) 42 | 43 | def _emit(self, rec): 44 | if self.file: 45 | self.file.write(json.dumps(rec) + '\n'); 46 | self.file.flush() 47 | 48 | def onConnect(self, request): 49 | logger.info('[BotactionRecordingServer] [%d] Client connecting: %s. Writing to %s', self.id, request.peer, self.logfile_path) 50 | self.file = open(self.logfile_path, 'w', encoding='utf-8') 51 | self._emit({ 52 | 'version': 1, 53 | 'session_id': self.id, 54 | '_debug_version': '0.0.1', # Give this an internal version for debugging corrupt reward.demo files # TODO, pull this from setup.py or the host docker image 55 | }) 56 | 57 | def onOpen(self): 58 | logger.info("[BotactionRecordingServer] [%d] Websocket connection established", self.id) 59 | 60 | def onClose(self, wasClean, code, reason): 61 | logger.info('[BotactionRecordingServer] [%d] Client connection closed: %s', self.id, reason) 62 | if self.file: 63 | self.file.close() 64 | self.file = None 65 | 66 | self._closed = True 67 | 68 | def onMessage(self, msg, binary): 69 | logger.debug('[BotactionRecordingServer] [%d] Received message from client: %s', self.id, msg) 70 | 71 | self._emit(json.loads(msg.decode('utf-8'))); 72 | 73 | def main(): 74 | parser = argparse.ArgumentParser(description=None) 75 | parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') 76 | parser.add_argument('-l', '--listen-address', default='127.0.0.1:15896', help='Address to listen on') 77 | parser.add_argument('-o', '--botaction-logfile', default='/tmp/demo/botactions.jsonl', help='Filename for timestamped log of bot actions.') 78 | args = parser.parse_args() 79 | 80 | BotactionRecordingServer.logfile_path = args.botaction_logfile 81 | 82 | if args.verbosity == 0: 83 | logger.setLevel(logging.INFO) 84 | elif args.verbosity >= 1: 85 | logger.setLevel(logging.DEBUG) 86 | 87 | factory = websocket.WebSocketServerFactory() 88 | factory.protocol = BotactionRecordingServer 89 | 90 | host, port = args.listen_address.split(':') 91 | port = int(port) 92 | logger.info('Listening on %s:%s', host, port) 93 | reactor.listenTCP(port, factory) 94 | reactor.run() 95 | return 0 96 | 97 | if __name__ == '__main__': 98 | sys.exit(main()) 99 | -------------------------------------------------------------------------------- /universe/vncdriver/vnc_session.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from twisted.internet import defer, endpoints 4 | 5 | from universe import error, utils 6 | from universe.twisty import reactor 7 | from universe.vncdriver import screen, vnc_client 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | class VNCSession(object): 12 | def __init__(self, remotes, error_buffer): 13 | self.remotes = remotes 14 | self.error_buffer = error_buffer 15 | self._pyglet_screen = None 16 | self.connect() 17 | 18 | def connect(self): 19 | utils.blockingCallFromThread(self._connect) 20 | 21 | def _connect(self): 22 | deferreds = [] 23 | 24 | for i, remote in enumerate(self.remotes): 25 | d = defer.Deferred() 26 | deferreds.append(d) 27 | 28 | factory = vnc_client.client_factory(d, self.error_buffer) 29 | factory.rewarder_session = self 30 | factory.label = 'vnc:{}:{}'.format(i, remote) 31 | endpoint = endpoints.clientFromString(reactor, 'tcp:'+remote) 32 | 33 | def success(i): 34 | logger.info('[%s] VNC connection established', factory.label) 35 | 36 | def fail(reason): 37 | reason = error.Error('[{}] Connection failed: {}'.format(factory.label, reason.value)) 38 | try: 39 | d.errback(utils.format_error(reason)) 40 | except defer.AlreadyCalledError: 41 | pass 42 | endpoint.connect(factory).addCallback(success).addErrback(fail) 43 | 44 | d = defer.DeferredList(deferreds, fireOnOneErrback=True) 45 | 46 | def success(results): 47 | # Store the _clients list when connected 48 | self._clients = [client for success, client in results] 49 | d.addCallback(success) 50 | return d 51 | 52 | def flip(self): 53 | observation_n = [] 54 | info_n = [] 55 | for i, client in enumerate(self._clients): 56 | observation, info = client.numpy_screen.flip() 57 | updates = info['vnc_session.framebuffer_updates'] 58 | 59 | # Keep the pyglet screen fed, but don't flip it until the user calls render 60 | if i == 0 and self._pyglet_screen: 61 | for update in updates: 62 | self._pyglet_screen.apply(update) 63 | 64 | observation_n.append(observation) 65 | info_n.append({'vnc.updates.n': len(updates)}) 66 | 67 | return observation_n, info_n 68 | 69 | def peek(self): 70 | observations = [client.numpy_screen.peek() for client in self._clients] 71 | return observations 72 | 73 | def step(self, action): 74 | reactor.callFromThread(self._step, action) 75 | return self.flip() 76 | 77 | def _step(self, action): 78 | try: 79 | for a, client in zip(action, self._clients): 80 | for event in a: 81 | if event[0] == 'KeyEvent': 82 | key, down = event[1:] 83 | client.send_KeyEvent(key, down) 84 | elif event[0] == 'PointerEvent': 85 | x, y, buttomask = event[1:] 86 | client.send_PointerEvent(x, y, buttomask) 87 | else: 88 | raise error.Error('Bad event type: {}'.format(type)) 89 | except Exception as e: 90 | self.error_buffer.record(e) 91 | 92 | def render(self): 93 | if not self._pyglet_screen: 94 | start = self.peek()[0] 95 | self._pyglet_screen = screen.PygletScreen(start) 96 | self._pyglet_screen.flip() 97 | 98 | def close(self): 99 | utils.blockingCallFromThread(self._close) 100 | 101 | def _close(self): 102 | if getattr(self, '_clients', None) is not None: 103 | for client in self._clients: 104 | client.close() 105 | self._clients = None 106 | -------------------------------------------------------------------------------- /universe/wrappers/gym_core_sync.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import logging 3 | from universe import rewarder, spaces, vectorized 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | class GymCoreSync(vectorized.Wrapper): 8 | """A synchronized version of the core envs. Its semantics should match 9 | that of the core envs. (By default, observations are pixels from 10 | the VNC session, but it also supports receiving the normal Gym 11 | observations over the rewarder protocol.) 12 | 13 | Provided primarily for testing and debugging. 14 | """ 15 | 16 | def __init__(self, env): 17 | super(GymCoreSync, self).__init__(env) 18 | self.reward_n = None 19 | self.done_n = None 20 | self.info = None 21 | 22 | # Metadata has already been cloned 23 | self.metadata['semantics.async'] = False 24 | 25 | def _reset(self): 26 | observation_n = self.env.reset() 27 | new_observation_n, self.reward_n, self.done_n, self.info = self.env.step([[] for i in range(self.n)]) 28 | rewarder.merge_observation_n(observation_n, new_observation_n) 29 | 30 | # Fast forward until the observation is caught up with the rewarder 31 | self._flip_past(observation_n, self.reward_n, self.done_n, self.info) 32 | 33 | assert all(r == 0 for r in self.reward_n), "Unexpectedly received rewards during reset phase: {}".format(self.reward_n) 34 | return observation_n 35 | 36 | def _step(self, action_n): 37 | # Add C keypress in order to "commit" the action, as 38 | # interpreted by the remote. 39 | action_n = [action + [ 40 | spaces.KeyEvent.by_name('c', down=True), 41 | spaces.KeyEvent.by_name('c', down=False) 42 | ] for action in action_n] 43 | 44 | observation_n, reward_n, done_n, info = self.env.step(action_n) 45 | if self.reward_n is not None: 46 | rewarder.merge_n( 47 | observation_n, reward_n, done_n, info, 48 | [None] * self.n, self.reward_n, self.done_n, self.info, 49 | ) 50 | self.reward_n = self.done_n = self.info = None 51 | 52 | while True: 53 | count = len([True for info_i in info['n'] if info_i['stats.reward.count'] == 0]) 54 | if count > 0: 55 | logger.debug('[GymCoreSync] Still waiting on %d envs to receive their post-commit reward', count) 56 | else: 57 | break 58 | 59 | new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)]) 60 | rewarder.merge_n( 61 | observation_n, reward_n, done_n, info, 62 | new_observation_n, new_reward_n, new_done_n, new_info 63 | ) 64 | 65 | assert all(info_i['stats.reward.count'] == 1 for info_i in info['n']), "Expected all stats.reward.counts to be 1: {}".format(info) 66 | 67 | # Fast forward until the observation is caught up with the rewarder 68 | self._flip_past(observation_n, reward_n, done_n, info) 69 | return observation_n, reward_n, done_n, info 70 | 71 | def _flip_past(self, observation_n, reward_n, done_n, info): 72 | # Wait until all observations are past the corresponding reset times 73 | remote_target_time = [info_i['reward_buffer.remote_time'] for info_i in info['n']] 74 | while True: 75 | new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)]) 76 | 77 | # info_i.get['diagnostics.image_remote_time'] may not exist, for example when an env 78 | # is resetting. target is a timestamp, thus > 0, so these will count as "need to catch up" 79 | deltas = [target - info_i.get('diagnostics.image_remote_time', 0) for target, info_i in zip(remote_target_time, new_info['n'])] 80 | count = len([d for d in deltas if d > 0]) 81 | 82 | rewarder.merge_n( 83 | observation_n, reward_n, done_n, info, 84 | new_observation_n, new_reward_n, new_done_n, new_info 85 | ) 86 | 87 | if count == 0: 88 | return 89 | else: 90 | logger.debug('[GymCoreSync] Still waiting on %d envs to catch up to their targets: %s', count, deltas) 91 | -------------------------------------------------------------------------------- /universe/rewarder/env_status.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import threading 3 | 4 | logger = logging.getLogger() 5 | 6 | def parse_episode_id(episode_id): 7 | if episode_id is None: 8 | return -1 9 | return int(episode_id) 10 | 11 | def generate_episode_id(parsed): 12 | if parsed == -1: 13 | return None 14 | return str(parsed) 15 | 16 | def compare_ids(a, b): 17 | if a == b: 18 | return 0 19 | elif a is None: 20 | return -1 21 | elif b is None: 22 | return 1 23 | elif parse_episode_id(a) < parse_episode_id(b): 24 | return -1 25 | else: 26 | return 1 27 | 28 | class EnvStatus(object): 29 | def __init__(self, label=None, primary=True): 30 | self.cv = threading.Condition() 31 | self._env_id = None 32 | self._env_state = None 33 | self._episode_id = '0' 34 | self._fps = None 35 | self.label = label or 'EnvStatus' 36 | self.primary = primary 37 | 38 | def env_info(self): 39 | with self.cv: 40 | return { 41 | 'env_state': self._env_state, 42 | 'env_id': self._env_id, 43 | 'episode_id': self._episode_id, 44 | 'fps': self._fps, 45 | } 46 | 47 | def set_env_info(self, env_state=None, env_id=None, episode_id=None, bump_past=None, fps=None): 48 | """Atomically set the environment state tracking variables. 49 | """ 50 | with self.cv: 51 | if env_id is None: 52 | env_id = self._env_id 53 | if env_state is None: 54 | env_state = self._env_state 55 | if fps is None: 56 | fps = self._fps 57 | self.cv.notifyAll() 58 | 59 | old_episode_id = self._episode_id 60 | if self.primary: 61 | current_id = parse_episode_id(self._episode_id) 62 | # Bump when changing from resetting -> running 63 | if bump_past is not None: 64 | bump_past_id = parse_episode_id(bump_past) 65 | current_id = max(bump_past_id+1, current_id+1) 66 | elif env_state == 'resetting': 67 | current_id += 1 68 | self._episode_id = generate_episode_id(current_id) 69 | assert self._fps or fps 70 | elif episode_id is False: 71 | # keep the same episode_id: this is just us proactive 72 | # setting the state to resetting after a done=True 73 | pass 74 | else: 75 | assert episode_id is not None, "No episode_id provided. This likely indicates a misbehaving server, which did not send an episode_id" 76 | self._episode_id = episode_id 77 | self._fps = fps 78 | logger.info('[%s] Changing env_state: %s (env_id=%s) -> %s (env_id=%s) (episode_id: %s->%s, fps=%s)', self.label, self._env_state, self._env_id, env_state, env_id, old_episode_id, self._episode_id, self._fps) 79 | self._env_state = env_state 80 | if env_id is not None: 81 | self._env_id = env_id 82 | 83 | return self.env_info() 84 | 85 | @property 86 | def episode_id(self): 87 | with self.cv: 88 | return self._episode_id 89 | 90 | @property 91 | def env_state(self): 92 | with self.cv: 93 | return self._env_state 94 | 95 | @env_state.setter 96 | def env_state(self, value): 97 | # TODO: Validate env_state 98 | self.set_env_info(value) 99 | 100 | @property 101 | def env_id(self): 102 | with self.cv: 103 | return self._env_id 104 | 105 | @env_id.setter 106 | def env_id(self, value): 107 | self.set_env_info(None, env_id=value) 108 | 109 | @property 110 | def fps(self): 111 | with self.cv: 112 | return self._fps 113 | 114 | def wait_for_env_state_change(self, start_state): 115 | with self.cv: 116 | while True: 117 | if self._env_state != start_state: 118 | return self.env_info() 119 | self.cv.wait(timeout=10) 120 | -------------------------------------------------------------------------------- /universe/runtimes/registration.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import json 3 | 4 | import six 5 | from gym import error 6 | 7 | 8 | class UnregisteredRuntime(error.Unregistered): 9 | """Raised when the user requests a runtime from the registry that does 10 | not actually exist. 11 | """ 12 | pass 13 | 14 | class DockerRuntime(object): 15 | """Lightweight struct for our DockerImage configuration""" 16 | def __init__(self, id=id, image=None, command=None, host_config=None, default_params=None, server_registry_file=None): 17 | """ 18 | Args: 19 | id: The short identifier for this runtime 20 | image: The full docker image name including a tag 21 | command: A list of commands to be passed to docker 22 | host_config: A dict that will be fed to docker.Client().create_host_config 23 | default_params: The default parameter values for this environment 24 | server_registry: A file containing a JSON dump of the server registry. The format will be runtime-specific. 25 | """ 26 | self.id = id 27 | self.image = image 28 | self.command = command or [] 29 | self.host_config = host_config or {} 30 | self.default_params = default_params or {} 31 | 32 | self._server_registry = None 33 | self._server_registry_file = server_registry_file 34 | 35 | @property 36 | def server_registry(self): 37 | if self._server_registry is None: 38 | with open(self._server_registry_file) as f: 39 | self._server_registry = json.load(f) 40 | return self._server_registry 41 | 42 | @property 43 | def _cli_flags(self): 44 | # Not everything maps in a straightforward way, e.g. cap_add => '--cap-add' but ipc_mode => '--ipc 45 | api_to_cli = { 46 | 'ipc_mode': 'ipc' 47 | } 48 | 49 | cli_flags = [] 50 | for api_key, api_value in self.host_config.items(): 51 | if isinstance(api_value, (six.string_types, bool)): 52 | cli_values = [api_value] 53 | else: 54 | cli_values = api_value 55 | 56 | for cli_value in cli_values: 57 | if api_key in api_to_cli: 58 | api_key = api_to_cli[api_key] 59 | cli_flag = '--{}'.format(api_key.replace('_', '-')) 60 | if isinstance(cli_value, bool): 61 | # boolean flag, like --privileged 62 | cli_flags += [cli_flag] 63 | else: 64 | cli_flags += [cli_flag, cli_value] 65 | 66 | return cli_flags 67 | 68 | def cli_command(self, vnc_port, rewarder_port, extra_flags=[]): 69 | return ['docker', 'run', 70 | '-p', '{}:5900'.format(vnc_port), 71 | '-p', '{}:15900'.format(rewarder_port)] + \ 72 | extra_flags + \ 73 | self._cli_flags + \ 74 | [self.image] + self.command 75 | 76 | 77 | class WindowsRuntime(object): 78 | # TODO: Spawn windows runtimes (right now managed manually) 79 | def __init__(self, id=id, default_params=None): 80 | """ 81 | Args: 82 | id: The short identifier for this runtime 83 | """ 84 | self.id = id 85 | self.default_params = default_params 86 | 87 | 88 | class Registry(object): 89 | def __init__(self): 90 | self.runtimes = collections.OrderedDict() 91 | 92 | def register_runtime(self, id, kind, **kwargs): 93 | if kind == "docker": 94 | self.runtimes[id] = DockerRuntime(id, **kwargs) 95 | elif kind == "windows": 96 | self.runtimes[id] = WindowsRuntime(id, **kwargs) 97 | else: 98 | raise error.Error("No runtime of kind {} . \n Valid options are ['docker']".format(kind)) 99 | 100 | def runtime_spec(self, id): 101 | """ 102 | id is a string describing the runtime, e.g 'flashgames 103 | 104 | Returns a configured DockerRuntime object 105 | """ 106 | try: 107 | return self.runtimes[id] 108 | except KeyError: 109 | raise UnregisteredRuntime('No registered runtime with name: {}'.format(id)) 110 | 111 | 112 | registry = Registry() 113 | register_runtime = registry.register_runtime 114 | runtime_spec = registry.runtime_spec 115 | -------------------------------------------------------------------------------- /universe/utils/display.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | 4 | import six 5 | import numpy as np 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | # We log these with logger, which in py2 chokes on unicode 10 | def fmt_plusminus(mean, dev): 11 | if six.PY3: 12 | return mean + '±' + dev 13 | else: 14 | # Logging unicode in py2 is asking for trouble 15 | return mean + '+-' + dev 16 | 17 | def compute_timestamps_pair_max(time_m_2, flat=True): 18 | if flat: 19 | # Ignore empty inputs, which happens when environments are resetting. 20 | time_m_2 = [[x for x in time_m_2 if len(x)]] 21 | 22 | if len(time_m_2) == 0: 23 | return None, None 24 | 25 | # We concatenate the (min, max) lags from a variety of runs. Those 26 | # runs may have different lengths. 27 | time_m_2 = [np.array(m) for m in time_m_2] 28 | 29 | timestamp_m = [] 30 | data_m = [] 31 | for m in time_m_2: 32 | if len(m) > 0: 33 | timestamp, data = compute_timestamps_sigma(m[:, 1]) 34 | timestamp_m.append(timestamp) 35 | data_m.append(data) 36 | else: 37 | timestamp_m.append(None) 38 | data_m.append({}) 39 | return timestamp_m, data_m 40 | 41 | def display_timestamps_pair_compact(time_m_2): 42 | """Takes a list of the following form: [(a1, b1), (a2, b2), ...] and 43 | returns a string a_mean-b_mean, flooring out at 0. 44 | """ 45 | if len(time_m_2) == 0: 46 | return '(empty)' 47 | 48 | time_m_2 = np.array(time_m_2) 49 | 50 | low = time_m_2[:, 0].mean() 51 | high = time_m_2[:, 1].mean() 52 | 53 | low = max(low, 0) 54 | 55 | # Not sure if this'll always be true, and not worth crashing over 56 | if high < 0: 57 | logger.warn('Harmless warning: upper-bound on clock skew is negative: (%s, %s). Please let Greg know about this.', low, high) 58 | 59 | return '{}-{}'.format(display_timestamp(low), display_timestamp(high)) 60 | 61 | def display_timestamps_pair(time_m_2): 62 | """Takes a list of the following form: [(a1, b1), (a2, b2), ...] and 63 | returns a string (a_mean+/-a_error, b_mean+/-b_error). 64 | """ 65 | if len(time_m_2) == 0: 66 | return '(empty)' 67 | 68 | time_m_2 = np.array(time_m_2) 69 | return '({}, {})'.format( 70 | display_timestamps(time_m_2[:, 0]), 71 | display_timestamps(time_m_2[:, 1]), 72 | ) 73 | 74 | def compute_timestamps_sigma_n(time_m): 75 | timestamp_m = [] 76 | data_m = [] 77 | 78 | for t in time_m: 79 | timestamp, data = compute_timestamps(t) 80 | timestamp_m.append(timestamp) 81 | data_m.append(data) 82 | 83 | return timestamp_m, data_m 84 | 85 | def compute_timestamps_sigma(time_m): 86 | if len(time_m) == 0: 87 | return None, {} 88 | 89 | mean = np.mean(time_m) 90 | std = standard_error(time_m) 91 | scale, units = pick_time_units(mean) 92 | return fmt_plusminus('{:.2f}{}'.format(mean * scale, units), '{:.2f}{}'.format(std * scale, units)), {'mean': mean} 93 | 94 | def display_timestamps(time_m): 95 | res, _ = compute_timestamps(time_m) 96 | if res is None: 97 | return '(empty)' 98 | else: 99 | return res 100 | 101 | def compute_timestamps(time_m): 102 | if len(time_m) == 0: 103 | return None, {} 104 | 105 | mean = np.mean(time_m) 106 | std = standard_error(time_m) 107 | return fmt_plusminus(display_timestamp(mean), display_timestamp(std)), {'mean': mean} 108 | 109 | def display_timestamps_n(time_m): 110 | # concatenate all the n's timesteps together, then display_timestamps on it 111 | return display_timestamps(np.concatenate(time_m)) 112 | 113 | def standard_error(ary, axis=0): 114 | if len(ary) > 1: 115 | return np.std(ary, axis=axis) / np.sqrt(len(ary) - 1) 116 | else: 117 | return np.std(ary, axis=axis) 118 | 119 | def display_timestamp(time): 120 | assert not isinstance(time, np.ndarray), 'Invalid scalar: {}'.format(time) 121 | scale, units = pick_time_units(time) 122 | return '{:.2f}{}'.format(time * scale, units) 123 | 124 | def pick_time_units(time): 125 | assert not isinstance(time, np.ndarray), 'Invalid scalar: {}'.format(time) 126 | if abs(time) < 1: 127 | return 1000, 'ms' 128 | else: 129 | return 1, 's' 130 | -------------------------------------------------------------------------------- /tests/functional/test_core_envs_semantics.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pytest 3 | 4 | import gym 5 | import numpy as np 6 | from PIL import Image 7 | from gym import spaces 8 | from universe import wrappers 9 | from universe.envs.vnc_core_env import translator 10 | 11 | def show(obs): 12 | Image.fromarray(obs).show() 13 | 14 | class AtariMatcher(object): 15 | def translator(self, env): 16 | return translator.AtariTranslator(env) 17 | 18 | def crop(self, obs): 19 | return obs[20:210, :160, :] 20 | 21 | def assert_match(self, obs, vnc_obs, extra_info=None, stage=None): 22 | # Crop out the mouse 23 | vnc_obs_cropped = self.crop(vnc_obs) 24 | obs_cropped = self.crop(obs) 25 | 26 | if not np.all(vnc_obs_cropped == obs_cropped): 27 | show(vnc_obs_cropped) 28 | show(obs_cropped) 29 | show(vnc_obs_cropped - obs_cropped) 30 | assert False, '[{}] Observations do not match: vnc_obs_cropped={} obs_cropped={} extra_info={}'.format(stage, vnc_obs_cropped, obs_cropped, extra_info) 31 | 32 | # Wraps an Atari-over-VNC env so that it behaves like a vectorized vanilla Atari env 33 | def atari_vnc_wrapper(env): 34 | env = wrappers.Vision(env) 35 | env = wrappers.GymCoreAction(env) 36 | return env 37 | 38 | class CartPoleLowDMatcher(object): 39 | def translator(self, env): 40 | return translator.CartPoleTranslator(env) 41 | 42 | def assert_match(self, obs, vnc_obs, extra_info=None, stage=None): 43 | assert np.all(np.isclose(obs, vnc_obs)), '[{}] Observations do not match: vnc_obs={} obs={}'.format(stage, vnc_obs, obs) 44 | 45 | def reset(matcher, env, vnc_env, stage=None): 46 | obs = env.reset() 47 | vnc_obs = vnc_env.reset() 48 | matcher.assert_match(obs, vnc_obs, stage=stage) 49 | 50 | def rollout(matcher, env, vnc_env, timestep_limit=None, stage=None): 51 | count = 0 52 | actions = matcher.translator(env) 53 | 54 | done = None 55 | while True: 56 | action = env.action_space.sample() 57 | 58 | obs, reward, done, info = env.step(action) 59 | if done: 60 | # Account for remote auto-reset 61 | obs = env.reset() 62 | 63 | vnc_obs, vnc_reward, vnc_done, vnc_info = vnc_env.step(action) 64 | assert reward == vnc_reward 65 | assert done == vnc_done 66 | assert vnc_info['stats.reward.count'] == 1 67 | matcher.assert_match(obs, vnc_obs, {'reward': reward, 'done': done}, stage=stage) 68 | 69 | count += 1 70 | if done or (timestep_limit is not None and count >= timestep_limit): 71 | break 72 | 73 | # TODO: we should have auto-env spinup 74 | specs = [ 75 | (gym.spec('gym-core.PongDeterministicSync-v3'), AtariMatcher(), atari_vnc_wrapper), 76 | (gym.spec('gym-core.PitfallDeterministicSync-v3'), AtariMatcher(), atari_vnc_wrapper), 77 | 78 | # This test is still broken. Looks like we're not piping the seed 79 | # to the CartPole env behind VNC 80 | # (gym.spec('gym-core.CartPoleLowDSync-v0'), CartPoleLowDMatcher()) 81 | ] 82 | 83 | @pytest.mark.parametrize("spec,matcher,wrapper", specs) 84 | def test_nice_vnc_semantics_match(spec, matcher, wrapper): 85 | # Check that when running over VNC or using the raw environment, 86 | # semantics match exactly. 87 | gym.undo_logger_setup() 88 | logging.getLogger().setLevel(logging.INFO) 89 | 90 | spaces.seed(0) 91 | 92 | vnc_env = spec.make() 93 | if vnc_env.metadata.get('configure.required', False): 94 | vnc_env.configure(remotes=1) 95 | vnc_env = wrapper(vnc_env) 96 | vnc_env = wrappers.Unvectorize(vnc_env) 97 | 98 | env = gym.make(spec._kwargs['gym_core_id']) 99 | 100 | env.seed(0) 101 | vnc_env.seed(0) 102 | 103 | # Check that reset observations work 104 | reset(matcher, env, vnc_env, stage='initial reset') 105 | 106 | # Check a full rollout 107 | rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps') 108 | 109 | # Reset to start a new episode 110 | reset(matcher, env, vnc_env, stage='reset to new episode') 111 | 112 | # Check that a step into the next episode works 113 | rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode') 114 | 115 | # Make sure env can be reseeded 116 | env.seed(1) 117 | vnc_env.seed(1) 118 | reset(matcher, env, vnc_env, 'reseeded reset') 119 | rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step') 120 | -------------------------------------------------------------------------------- /universe/vncdriver/libvnc_session.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from twisted.internet import defer, endpoints 5 | 6 | from universe import error, utils 7 | from universe.twisty import reactor 8 | from universe.vncdriver import screen, vnc_client 9 | 10 | PYGAME_INSTALLED = None 11 | def load_pygame(): 12 | global PYGAME_INSTALLED, pygame 13 | if PYGAME_INSTALLED is not None: 14 | return 15 | 16 | try: 17 | import pygame 18 | PYGAME_INSTALLED = True 19 | except ImportError: 20 | PYGAME_INSTALLED = False 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class LibVNCSession(object): 26 | def __init__(self, remotes, error_buffer, encoding=None, compress_level=None, fine_quality_level=None, subsample_level=None): 27 | """compress_level: 0-9 [9 is highest compression] 28 | fine_quality_level: 0-100 [100 is best quality] 29 | subsample_level: 0-3 [0 is best quality] 30 | 31 | Lots of references for this, but 32 | https://github.com/TurboVNC/turbovnc/blob/master/doc/performance.txt 33 | is decent. 34 | """ 35 | 36 | load_pygame() 37 | import libvncdriver 38 | 39 | if encoding is None: 40 | encoding = os.environ.get('LIBVNC_ENCODING', 'tight') 41 | if compress_level is None: 42 | compress_level = int(os.environ.get('LIBVNC_COMPRESS_LEVEL', '0')) 43 | if fine_quality_level is None: 44 | fine_quality_level = int(os.environ.get('LIBVNC_FINE_QUALITY_LEVEL', '100')) 45 | if subsample_level is None: 46 | subsample_level = int(os.environ.get('LIBVNC_SUBSAMPLE_LEVEL', '0')) 47 | 48 | if not hasattr(libvncdriver, 'VNCSession'): 49 | raise error.Error(''' 50 | *=================================================* 51 | || libvncdriver is not installed || 52 | || Try installing with "pip install libvncdriver" || 53 | || or use the go or python driver by setting || 54 | || UNIVERSE_VNCDRIVER=go || 55 | || UNIVERSE_VNCDRIVER=py || 56 | *=================================================*''') 57 | logger.info("Using libvncdriver's %s encoding" % encoding) 58 | self.driver = libvncdriver.VNCSession( 59 | remotes=remotes, 60 | error_buffer=error_buffer, 61 | encoding=encoding, 62 | compress_level=compress_level, 63 | fine_quality_level=fine_quality_level, 64 | subsample_level=subsample_level, 65 | ) 66 | self.screen = None 67 | self.render_called_once = False 68 | if PYGAME_INSTALLED: 69 | pygame.init() 70 | 71 | def flip(self): 72 | return self._guard(self.driver.flip) 73 | 74 | def step(self, action): 75 | return self.driver.step(action) 76 | 77 | def render(self): 78 | self._guard(self._render) 79 | 80 | def _guard(self, fn): 81 | try: 82 | return fn() 83 | except (KeyboardInterrupt, SystemExit): 84 | self.close() 85 | 86 | def _render(self): 87 | self.before_render() 88 | if not PYGAME_INSTALLED: 89 | return 90 | # For some reason pygame wants X and Y swapped 91 | aray, n = self.driver.flip() 92 | if self.screen is None: 93 | self.screen = pygame.display.set_mode(aray[0].shape[:2][::-1]) 94 | surf = pygame.surfarray.make_surface(aray[0].swapaxes(0, 1)) 95 | rect = surf.get_rect() 96 | self.screen.blit(surf, rect) 97 | pygame.display.flip() 98 | 99 | for event in pygame.event.get(): 100 | if event.type == pygame.QUIT: 101 | self.close() 102 | 103 | def before_render(self): 104 | if not self.render_called_once: 105 | self.render_called_once = True 106 | if not PYGAME_INSTALLED: 107 | logger.warn(''' 108 | *================================================================* 109 | || || 110 | || Rendering disabled when using libvnc without pygame installed. || 111 | || Consider viewing over VNC or running "pip install pygame". || 112 | || || 113 | *================================================================*''') 114 | 115 | 116 | def close(self): 117 | if PYGAME_INSTALLED: 118 | pygame.quit() 119 | self.driver.close() 120 | -------------------------------------------------------------------------------- /universe/envs/vnc_core_env/translator.py: -------------------------------------------------------------------------------- 1 | from universe import spaces 2 | from universe.envs.vnc_core_env import key 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | class AtariKeyState(object): 9 | """ 10 | Converts from VNCEvents to an Atari-v0 action index 11 | 12 | Since spaces.KeyEvent only give you a diff of a keyboard, we need to persist the total state of the keyboard to 13 | convert from VNCEvents to an action index 14 | """ 15 | def __init__(self, env): 16 | self._translator = AtariTranslator(env) 17 | self._down_keysyms = set() # Assumes that your env starts with no keys pressed down 18 | 19 | def apply_vnc_actions(self, vnc_actions): 20 | """ 21 | Play a list of vnc_actions forward over the current keysyms state 22 | 23 | NOTE: Since we are squashing a set of diffs into a single keyboard state, some information may be lost. 24 | For example if the Z key is down, then we receive [(Z-up), (Z-down)], the output will not reflect any change in Z 25 | You can make each frame shorter to offset this effect. 26 | """ 27 | for event in vnc_actions: 28 | if isinstance(event, spaces.KeyEvent): 29 | if event.down: 30 | self._down_keysyms.add(event.key) 31 | else: 32 | self._down_keysyms.discard(event.key) 33 | 34 | logger.debug("AtariKeyState._down_keysyms: {}".format(self._down_keysyms)) 35 | 36 | def to_keysyms(self): 37 | """Returns the current state as keysyms""" 38 | return list(self._down_keysyms) 39 | 40 | def to_index(self): 41 | """Returns the current state as an index""" 42 | return self._translator.keysyms_to_index(self.to_keysyms()) 43 | 44 | 45 | class AtariTranslator(object): 46 | """Translates Atari actions to and from various formats""" 47 | _all_keysyms = [key.UP, key.DOWN, key.LEFT, key.RIGHT, key.Z] 48 | 49 | def __init__(self, env): 50 | # e.g. {0: 'NOOP', 1: 'FIRE', 2: 'RIGHT', 3: 'LEFT', 4: 'RIGHTFIRE', 5: 'LEFTFIRE'} 51 | self._index_to_name_ = {} 52 | # e.g. {'RIGHT': 2, 'FIRE': 1, 'RIGHTFIRE': 4, 'LEFTFIRE': 5, 'NOOP': 0, 'LEFT': 3} 53 | self._name_to_index_ = {} 54 | 55 | for i, meaning in enumerate(env.unwrapped.get_action_meanings()): 56 | self._name_to_index_[meaning] = i 57 | self._index_to_name_[i] = meaning 58 | 59 | def keysyms_to_vnc_actions(self, keysyms): 60 | actions = [] 61 | keysyms = set(keysyms) 62 | for keysym in self._all_keysyms: 63 | down = keysym in keysyms 64 | actions.append(spaces.KeyEvent(keysym, down=down)) 65 | return actions 66 | 67 | def keysyms_to_index(self, keysyms): 68 | name = self._keysyms_to_name(keysyms) 69 | return self._name_to_index(name) 70 | 71 | def index_to_keysyms(self, i): 72 | name = self._index_to_name(i) 73 | keysyms = [] 74 | if 'UP' in name: 75 | keysyms.append(key.UP) 76 | if 'DOWN' in name: 77 | keysyms.append(key.DOWN) 78 | if 'LEFT' in name: 79 | keysyms.append(key.LEFT) 80 | if 'RIGHT' in name: 81 | keysyms.append(key.RIGHT) 82 | if 'FIRE' in name: 83 | keysyms.append(key.Z) 84 | return keysyms 85 | 86 | def _name_to_index(self, name): 87 | return self._name_to_index_.get(name, 0) 88 | 89 | def _index_to_name(self, i): 90 | return self._index_to_name_[i] 91 | 92 | def _keysyms_to_name(self, keysyms): 93 | keys = '' 94 | if key.UP in keysyms: 95 | keys += 'UP' 96 | if key.DOWN in keysyms: 97 | keys += 'DOWN' 98 | if key.LEFT in keysyms: 99 | keys += 'LEFT' 100 | if key.RIGHT in keysyms: 101 | keys += 'RIGHT' 102 | if key.Z in keysyms: 103 | keys += 'FIRE' 104 | return keys 105 | 106 | class CartPoleTranslator(object): 107 | def __init__(self, env): 108 | pass 109 | 110 | def keysyms_to_vnc_actions(self, keysyms): 111 | down = key.LEFT in keysyms 112 | return [spaces.KeyEvent(key.LEFT, down=down)] 113 | 114 | def keysyms_to_index(self, keys): 115 | if key.LEFT in keys: 116 | return 0 117 | else: 118 | return 1 119 | 120 | def index_to_keysyms(self, i): 121 | if i == 0: 122 | return [key.LEFT] 123 | else: 124 | return [] 125 | -------------------------------------------------------------------------------- /universe/vncdriver/constants.py: -------------------------------------------------------------------------------- 1 | # Encodings 2 | RAW_ENCODING = 0 3 | COPY_RECTANGLE_ENCODING = 1 4 | RRE_ENCODING = 2 5 | CORRE_ENCODING = 4 6 | HEXTILE_ENCODING = 5 7 | ZLIB_ENCODING = 6 8 | TIGHT_ENCODING = 7 9 | ZLIBHEX_ENCODING = 8 10 | ZRLE_ENCODING = 16 11 | #0xffffff00 to 0xffffffff tight options 12 | PSEUDO_CURSOR_ENCODING = -239 13 | 14 | # Keycodes 15 | KEY_BackSpace = 0xff08 16 | KEY_Tab = 0xff09 17 | KEY_Return = 0xff0d 18 | KEY_Escape = 0xff1b 19 | KEY_Insert = 0xff63 20 | KEY_Delete = 0xffff 21 | KEY_Home = 0xff50 22 | KEY_End = 0xff57 23 | KEY_PageUp = 0xff55 24 | KEY_PageDown = 0xff56 25 | KEY_Left = 0xff51 26 | KEY_Up = 0xff52 27 | KEY_Right = 0xff53 28 | KEY_Down = 0xff54 29 | KEY_F1 = 0xffbe 30 | KEY_F2 = 0xffbf 31 | KEY_F3 = 0xffc0 32 | KEY_F4 = 0xffc1 33 | KEY_F5 = 0xffc2 34 | KEY_F6 = 0xffc3 35 | KEY_F7 = 0xffc4 36 | KEY_F8 = 0xffc5 37 | KEY_F9 = 0xffc6 38 | KEY_F10 = 0xffc7 39 | KEY_F11 = 0xffc8 40 | KEY_F12 = 0xffc9 41 | KEY_F13 = 0xFFCA 42 | KEY_F14 = 0xFFCB 43 | KEY_F15 = 0xFFCC 44 | KEY_F16 = 0xFFCD 45 | KEY_F17 = 0xFFCE 46 | KEY_F18 = 0xFFCF 47 | KEY_F19 = 0xFFD0 48 | KEY_F20 = 0xFFD1 49 | KEY_ShiftLeft = 0xffe1 50 | KEY_ShiftRight = 0xffe2 51 | KEY_ControlLeft = 0xffe3 52 | KEY_ControlRight = 0xffe4 53 | KEY_MetaLeft = 0xffe7 54 | KEY_MetaRight = 0xffe8 55 | KEY_AltLeft = 0xffe9 56 | KEY_AltRight = 0xffea 57 | 58 | KEY_Scroll_Lock = 0xFF14 59 | KEY_Sys_Req = 0xFF15 60 | KEY_Num_Lock = 0xFF7F 61 | KEY_Caps_Lock = 0xFFE5 62 | KEY_Pause = 0xFF13 63 | KEY_Super_L = 0xFFEB 64 | KEY_Super_R = 0xFFEC 65 | KEY_Hyper_L = 0xFFED 66 | KEY_Hyper_R = 0xFFEE 67 | 68 | KEY_KP_0 = 0xFFB0 69 | KEY_KP_1 = 0xFFB1 70 | KEY_KP_2 = 0xFFB2 71 | KEY_KP_3 = 0xFFB3 72 | KEY_KP_4 = 0xFFB4 73 | KEY_KP_5 = 0xFFB5 74 | KEY_KP_6 = 0xFFB6 75 | KEY_KP_7 = 0xFFB7 76 | KEY_KP_8 = 0xFFB8 77 | KEY_KP_9 = 0xFFB9 78 | KEY_KP_Enter = 0xFF8D 79 | 80 | KEY_ForwardSlash = 0x002F 81 | KEY_BackSlash = 0x005C 82 | KEY_SpaceBar= 0x0020 83 | 84 | # TODO: build this programmatically? 85 | KEYMAP = { 86 | 'bsp': KEY_BackSpace, 87 | 'tab': KEY_Tab, 88 | 'return': KEY_Return, 89 | 'enter': KEY_Return, 90 | 'esc': KEY_Escape, 91 | 'ins': KEY_Insert, 92 | 'delete': KEY_Delete, 93 | 'del': KEY_Delete, 94 | 'home': KEY_Home, 95 | 'end': KEY_End, 96 | 'pgup': KEY_PageUp, 97 | 'pgdn': KEY_PageDown, 98 | 'ArrowLeft': KEY_Left, 99 | 'left': KEY_Left, 100 | 'ArrowUp': KEY_Up, 101 | 'up': KEY_Up, 102 | 'ArrowRight': KEY_Right, 103 | 'right': KEY_Right, 104 | 'ArrowDown': KEY_Down, 105 | 'down': KEY_Down, 106 | 107 | 'slash': KEY_BackSlash, 108 | 'bslash': KEY_BackSlash, 109 | 'fslash': KEY_ForwardSlash, 110 | 'spacebar': KEY_SpaceBar, 111 | 'space': KEY_SpaceBar, 112 | 'sb': KEY_SpaceBar, 113 | 114 | 'f1': KEY_F1, 115 | 'f2': KEY_F2, 116 | 'f3': KEY_F3, 117 | 'f4': KEY_F4, 118 | 'f5': KEY_F5, 119 | 'f6': KEY_F6, 120 | 'f7': KEY_F7, 121 | 'f8': KEY_F8, 122 | 'f9': KEY_F9, 123 | 'f10': KEY_F10, 124 | 'f11': KEY_F11, 125 | 'f12': KEY_F12, 126 | 'f13': KEY_F13, 127 | 'f14': KEY_F14, 128 | 'f15': KEY_F15, 129 | 'f16': KEY_F16, 130 | 'f17': KEY_F17, 131 | 'f18': KEY_F18, 132 | 'f19': KEY_F19, 133 | 'f20': KEY_F20, 134 | 135 | 'lshift': KEY_ShiftLeft, 136 | 'shift': KEY_ShiftLeft, 137 | 'rshift': KEY_ShiftRight, 138 | 'lctrl': KEY_ControlLeft, 139 | 'ctrl': KEY_ControlLeft, 140 | 'rctrl': KEY_ControlRight, 141 | 'lmeta': KEY_MetaLeft, 142 | 'meta': KEY_MetaLeft, 143 | 'rmeta': KEY_MetaRight, 144 | 'lalt': KEY_AltLeft, 145 | 'alt': KEY_AltLeft, 146 | 'ralt': KEY_AltRight, 147 | 'scrlk': KEY_Scroll_Lock, 148 | 'sysrq': KEY_Sys_Req, 149 | 'numlk': KEY_Num_Lock, 150 | 'caplk': KEY_Caps_Lock, 151 | 'pause': KEY_Pause, 152 | 'lsuper': KEY_Super_L, 153 | 'super': KEY_Super_L, 154 | 'rsuper': KEY_Super_R, 155 | 'lhyper': KEY_Hyper_L, 156 | 'hyper': KEY_Hyper_L, 157 | 'rhyper': KEY_Hyper_R, 158 | 159 | 'kp0': KEY_KP_0, 160 | 'kp1': KEY_KP_1, 161 | 'kp2': KEY_KP_2, 162 | 'kp3': KEY_KP_3, 163 | 'kp4': KEY_KP_4, 164 | 'kp5': KEY_KP_5, 165 | 'kp6': KEY_KP_6, 166 | 'kp7': KEY_KP_7, 167 | 'kp8': KEY_KP_8, 168 | 'kp9': KEY_KP_9, 169 | 'kpenter': KEY_KP_Enter, 170 | } 171 | -------------------------------------------------------------------------------- /universe/kube/discovery.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import pipes 4 | import subprocess 5 | 6 | class Error(Exception): 7 | pass 8 | 9 | logger = logging.getLogger() 10 | 11 | def pretty_command(command): 12 | return ' '.join(pipes.quote(c) for c in command) 13 | 14 | def log_command(command, prefix=''): 15 | logger.info('%sExecuting: %s', prefix, pretty_command(command)) 16 | 17 | def check_call(command, *args, **kwargs): 18 | log_command(command) 19 | return subprocess.check_call(command, *args, **kwargs) 20 | 21 | def popen(command, *args, **kwargs): 22 | log_command(command) 23 | return subprocess.Popen(command, *args, **kwargs) 24 | 25 | def check_with_output(command, *args, **kwargs): 26 | log_command(command) 27 | proc = subprocess.Popen(command, *args, stdout=subprocess.PIPE, **kwargs) 28 | stdout, _ = proc.communicate() 29 | if proc.returncode != 0: 30 | raise Error('Command {} returned non-zero exit status {}'.format(command, proc.returncode)) 31 | return stdout 32 | 33 | def interpret_ready(pod): 34 | # status: 35 | # conditions: 36 | # - lastProbeTime: null 37 | # lastTransitionTime: 2016-07-06T05:29:45Z 38 | # message: 'containers with unready status: [xdummy xvnc vnc-atari]' 39 | # reason: ContainersNotReady 40 | # status: "False" 41 | # type: Ready 42 | if 'conditions' not in pod['status']: 43 | return False 44 | 45 | ready = [c for c in pod['status']['conditions'] if c['type'] == 'Ready'] 46 | if not ready: 47 | return False 48 | 49 | return ready[0]['status'] == 'True' 50 | 51 | def interpret_ports(containers): 52 | # TODO: clean up hack 53 | try: 54 | recorder = containers['vnc-recorder'] 55 | except KeyError: 56 | pass 57 | else: 58 | spec = recorder['ports'][0] 59 | assert spec['containerPort'] == 5899 60 | return spec['hostPort'], None 61 | 62 | app = [k for k in containers.keys() if k.startswith('vnc-')] 63 | assert len(app) == 1 64 | app = app[0] 65 | 66 | port_mapping = {} 67 | for spec in containers[app]['ports']: 68 | port_mapping[spec['containerPort']] = spec['hostPort'] 69 | # vnc, rewarder 70 | return port_mapping[5900], port_mapping.get(15900) 71 | 72 | class VNCEnvDiscovery(object): 73 | def __init__(self): 74 | self.context = 'sci' 75 | self.namespace = 'gym' 76 | self.kubectl = ['kubectl', '--context', self.context, '--namespace', self.namespace] 77 | 78 | def discover_batches(self): 79 | pods = check_with_output(self.kubectl + ['get', 'pods', '-o', 'json', '-l', 'type=universe']) 80 | pods = json.loads(pods) 81 | 82 | batches = {} 83 | for pod in pods['items']: 84 | if 'deletionTimestamp' in pod['metadata']: 85 | # Pod has been deleted! 86 | continue 87 | 88 | batch = pod['metadata']['labels']['batch'] 89 | if batch not in batches: 90 | batches[batch] = {'count': 0} 91 | batches[batch]['count'] += 1 92 | return batches 93 | 94 | def discover(self, batch, force_ready=False): 95 | pods = check_with_output(self.kubectl + ['get', 'pods', '-o', 'json', '-l', 'type=universe', '-l', 'batch={}'.format(batch)]) 96 | pods = json.loads(pods) 97 | 98 | if len(pods['items']) == 0: 99 | raise Error('Incorrect batch id: {}'.format(batch)) 100 | 101 | remotes = [] 102 | 103 | for pod in pods['items']: 104 | name = pod['metadata']['name'] 105 | containers = {} 106 | for container in pod['spec']['containers']: 107 | containers[container['name']] = container 108 | vnc_port, rewarder_port = interpret_ports(containers) 109 | node = pod['spec'].get('nodeName') 110 | 111 | # Not scheduled on a node yet 112 | if node is None: 113 | if force_ready: 114 | raise Error('Not all pods ready: {} is not scheduled on a node yet'.format(name)) 115 | continue 116 | 117 | address = '{}:{}'.format(node, vnc_port) 118 | if rewarder_port is not None: 119 | address += '+{}'.format(rewarder_port) 120 | spec = { 121 | 'name': name, 122 | 'address': address, 123 | 'ready': interpret_ready(pod), 124 | } 125 | remotes.append(spec) 126 | return remotes 127 | 128 | vnc_env_discovery = VNCEnvDiscovery() 129 | discover = vnc_env_discovery.discover 130 | discover_batches = vnc_env_discovery.discover_batches 131 | -------------------------------------------------------------------------------- /universe/envs/vnc_core_env/vnc_core_env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | import gym 5 | from universe import spaces 6 | from universe.envs import vnc_env 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | class GymCoreEnv(vnc_env.VNCEnv): 11 | def __init__(self, gym_core_id, fps=60): 12 | super(GymCoreEnv, self).__init__() 13 | 14 | self.metadata = dict(self.metadata) 15 | self.metadata['video.frames_per_second'] = fps 16 | 17 | self.gym_core_id = gym_core_id 18 | self._seed_value = None 19 | 20 | self.vnc_pixels = True 21 | 22 | class GymCoreSyncEnv(GymCoreEnv): 23 | """A synchronized version of the core envs. Its semantics should match 24 | that of the core envs. (By default, observations are pixels from 25 | the VNC session, but it also supports receiving the normal Gym 26 | observations over the rewarder socket.) 27 | 28 | Provided primarily for testing and debugging. 29 | """ 30 | 31 | def __init__(self, gym_core_id, fps=60, vnc_pixels=True): 32 | super(GymCoreSyncEnv, self).__init__(gym_core_id, fps=fps) 33 | # Metadata has already been cloned 34 | self.metadata['semantics.async'] = False 35 | 36 | self.gym_core_id = gym_core_id 37 | self.vnc_pixels = vnc_pixels 38 | 39 | if not vnc_pixels: 40 | self._core_env = gym.spec(gym_core_id).make() 41 | else: 42 | self._core_env = None 43 | 44 | def _flip_past(self, when_n): 45 | info_n = [{} for i in range(self.n)] 46 | while True: 47 | observation_n, obs_info_n = self.vnc_session.flip() 48 | metadata_n = self.diagnostics.extract_metadata(observation_n) 49 | 50 | # Save the update count 51 | self._propagate_obs_info(info_n, obs_info_n) 52 | 53 | # All remote times, so no clock skew adjustments needed 54 | invalid = [] 55 | for i, (metadata, when) in enumerate(zip(metadata_n, when_n)): 56 | delta = when - metadata.get('now', 0) 57 | if delta > 0: 58 | invalid.append((i, delta)) 59 | if not invalid: 60 | break 61 | else: 62 | tick = 1./self.metadata['video.frames_per_second'] 63 | logger.info('Waiting %sms for the following observations to catch up: %s', int(1000*tick), invalid) 64 | time.sleep(tick) 65 | return observation_n, info_n 66 | 67 | def _reset(self): 68 | assert self.rewarder_session 69 | 70 | result = self.rewarder_session.reset( 71 | seed=self._seed_value, 72 | ) 73 | # Clear seed value so we don't double-send it 74 | self._seed_value = None 75 | 76 | # Wait until all the observations have passed the reset_time 77 | remote_reset_time = [response['headers']['sent_at'] for _, _, response in result] 78 | observation_n, _ = self._flip_past(remote_reset_time) 79 | 80 | # Double check that our reward queue is empty 81 | assert all(c == 0 for c in self.rewarder_session.rewards_count()) 82 | 83 | return self._observation(observation_n) 84 | 85 | def _observation(self, observation_n): 86 | if self.vnc_pixels: 87 | return observation_n 88 | else: 89 | observation_n = self.rewarder_session.pop_observation() 90 | assert all(observation is not None for observation in observation_n), 'At least one missing observation: {}'.format(observation_n) 91 | return self._core_env.observation_space.from_jsonable(observation_n) 92 | 93 | def _step(self, action_n): 94 | # Add C keypress in order to "commit" the action, as 95 | # interpreted by the remote. 96 | action_n = [action + [ 97 | spaces.KeyEvent.by_name('c', down=True), 98 | spaces.KeyEvent.by_name('c', down=False) 99 | ] for action in action_n] 100 | # Submit directly to VNC session, without popping rewards 101 | logger.debug('Submitting actions: %s', action_n) 102 | action_n = self._compile_actions(action_n) 103 | _, obs_info_n = self.vnc_session.step(action_n) 104 | # Wait until the actions have actually happened 105 | self.rewarder_session.wait(timeout=5) 106 | 107 | # TODO: this is now present in the info messages; need to 108 | # update the implementation. 109 | when_n = [reward_buffer.info['reward_buffer.remote_time'] for reward_buffer in self.rewarder_session.reward_buffers] 110 | observation_n, obs_info_n = self._flip_past(when_n) 111 | 112 | reward_n, reward_time_n, done_n, info_n = self.rewarder_session.pop() 113 | self._propagate_obs_info(info_n, obs_info_n) 114 | 115 | # Warn if we detect multiple rewards 116 | if any(info['stats.reward.count'] != 1 for info in info_n): 117 | # Arrived but there was a bug 118 | logger.warn('Likely bug: should have received 1 reward for every env, but instead received %s. Current return: observation=%s reward=%s done=%s info=%s', [info['stats.reward.count'] for info in info_n], observation_n, reward_n, done_n, info_n) 119 | 120 | return self._observation(observation_n), reward_n, done_n, {'n': info_n} 121 | -------------------------------------------------------------------------------- /universe/remotes/hardcoded_addresses.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import re 4 | import six.moves.urllib.parse as urlparse 5 | 6 | from universe import error, utils 7 | from universe.remotes import remote 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | class HardcodedAddresses(object): 12 | @classmethod 13 | def build(cls, remotes, **kwargs): 14 | parsed = urlparse.urlparse(remotes) 15 | if parsed.scheme != 'vnc': 16 | raise error.Error('HardcodedAddresses must be initialized with a string starting with vnc://: {}'.format(remotes)) 17 | 18 | addresses = parsed.netloc.split(',') 19 | query = urlparse.parse_qs(parsed.query) 20 | # We could support per-backend passwords, but no need for it 21 | # right now. 22 | password = query.get('password', [utils.default_password()])[0] 23 | vnc_addresses, rewarder_addresses = parse_remotes(addresses) 24 | res = cls(vnc_addresses, rewarder_addresses, vnc_password=password, rewarder_password=password, **kwargs) 25 | return res, res.available_n 26 | 27 | def __init__(self, vnc_addresses, rewarder_addresses, vnc_password, rewarder_password, start_timeout=None): 28 | if vnc_addresses is not None: 29 | self.available_n = len(vnc_addresses) 30 | elif rewarder_addresses is not None: 31 | self.available_n = len(rewarder_addresses) 32 | else: 33 | assert False 34 | 35 | self.supports_reconnect = False 36 | self.connect_vnc = vnc_addresses is not None 37 | self.connect_rewarder = rewarder_addresses is not None 38 | if rewarder_addresses is None: 39 | logger.info("No rewarder addresses were provided, so this env cannot connect to the remote's rewarder channel, and cannot send control messages (e.g. reset)") 40 | 41 | self.vnc_addresses = vnc_addresses 42 | self.vnc_password = vnc_password 43 | self.rewarder_addresses = rewarder_addresses 44 | self.rewarder_password = rewarder_password 45 | if start_timeout is None: 46 | start_timeout = 2 * self.available_n + 5 47 | self.start_timeout = start_timeout 48 | 49 | self._popped = False 50 | 51 | def pop(self, n=None): 52 | if self._popped: 53 | assert n is None 54 | return [] 55 | self._popped = True 56 | 57 | remotes = [] 58 | for i in range(self.available_n): 59 | if self.vnc_addresses is not None: 60 | vnc_address = self.vnc_addresses[i] 61 | else: 62 | vnc_address = None 63 | 64 | if self.rewarder_addresses is not None: 65 | rewarder_address = self.rewarder_addresses[i] 66 | else: 67 | rewarder_address = None 68 | 69 | name = self._handles[i] 70 | env = remote.Remote( 71 | handle=self._handles[i], 72 | vnc_address=vnc_address, 73 | vnc_password=self.vnc_password, 74 | rewarder_address=rewarder_address, 75 | rewarder_password=self.rewarder_password, 76 | ) 77 | remotes.append(env) 78 | return remotes 79 | 80 | def allocate(self, handles, initial=False, params={}): 81 | if len(handles) > self.available_n: 82 | raise error.Error('Requested {} handles, but only have {} envs'.format(len(handles), self.available_n)) 83 | self.n = len(handles) 84 | self._handles = handles 85 | 86 | def close(self): 87 | pass 88 | 89 | def parse_remotes(remotes): 90 | # Parse a list of remotes of the form: 91 | # 92 | # address:vnc_port+rewarder_port (e.g. localhost:5900+15900) 93 | # 94 | # either vnc_port or rewarder_port can be omitted, but not both 95 | 96 | all_vnc = None 97 | all_rewarder = None 98 | 99 | vnc_addresses = [] 100 | rewarder_addresses = [] 101 | 102 | for remote in remotes: 103 | # Parse off +, then : 104 | if '+' in remote: 105 | if all_vnc == False: 106 | raise error.Error('Either all or no remotes must have rewarders: {}'.format(remotes)) 107 | all_vnc = True 108 | 109 | remote, rewarder_port = remote.split('+') 110 | if not re.match(r'^[0-9]+$', rewarder_port): 111 | raise error.Error('Rewarder port must be an integer, not `{}`: {}'.format(rewarder_port, remotes)) 112 | rewarder_port = int(rewarder_port) 113 | else: 114 | if all_vnc == True: 115 | raise error.Error('Either all or no remotes must have rewarders: {}'.format(remotes)) 116 | all_vnc = False 117 | 118 | rewarder_port = None 119 | 120 | if ':' in remote: 121 | if all_rewarder == False: 122 | raise error.Error('Either all or no remotes must have a VNC port: {}'.format(remotes)) 123 | all_rewarder = True 124 | 125 | remote, vnc_port = remote.split(':') 126 | if not re.match(r'^[0-9]+$', vnc_port): 127 | raise error.Error('VNC port must be an integer, not `{}`: {}'.format(vnc_port, remotes)) 128 | vnc_port = int(vnc_port) 129 | else: 130 | if all_rewarder == True: 131 | raise error.Error('Either all or no remotes must have a VNC port: {}'.format(remotes)) 132 | all_rewarder = False 133 | 134 | vnc_port = None 135 | all_rewarder = False 136 | 137 | host = remote 138 | if not re.match(r'^[-a-zA-Z0-9\.\_]+$', host): 139 | raise error.Error('Invalid hostname for remote: {}'.format(remotes)) 140 | 141 | if rewarder_port is not None: 142 | rewarder_address = '{}:{}'.format(host, rewarder_port) 143 | rewarder_addresses.append(rewarder_address) 144 | 145 | if vnc_port is not None: 146 | vnc_address = '{}:{}'.format(host, vnc_port) 147 | vnc_addresses.append(vnc_address) 148 | 149 | if not all_vnc and not all_rewarder: 150 | raise error.Error('You must provide either rewarder or a VNC port: {}'.format(remotes)) 151 | 152 | if not vnc_addresses: 153 | vnc_addresses = None 154 | if not rewarder_addresses: 155 | rewarder_addresses = None 156 | return vnc_addresses, rewarder_addresses 157 | -------------------------------------------------------------------------------- /universe/wrappers/gym_core.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import gym 3 | import time 4 | import numpy as np 5 | from universe import error 6 | from gym import spaces as gym_spaces 7 | from universe import spaces 8 | from universe import rewarder, vectorized 9 | from universe.envs.vnc_core_env import translator 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | ATARI_HEIGHT = 210 14 | ATARI_WIDTH = 160 15 | 16 | def atari_vnc(up=False, down=False, left=False, right=False, z=False): 17 | return [spaces.KeyEvent.by_name('up', down=up), 18 | spaces.KeyEvent.by_name('left', down=left), 19 | spaces.KeyEvent.by_name('right', down=right), 20 | spaces.KeyEvent.by_name('down', down=down), 21 | spaces.KeyEvent.by_name('z', down=z)] 22 | 23 | def gym_core_action_space(gym_core_id): 24 | spec = gym.spec(gym_core_id) 25 | 26 | if spec.id == 'CartPole-v0': 27 | return spaces.Hardcoded([[spaces.KeyEvent.by_name('left', down=True)], 28 | [spaces.KeyEvent.by_name('left', down=False)]]) 29 | elif spec._entry_point.startswith('gym.envs.atari:'): 30 | actions = [] 31 | env = spec.make() 32 | for action in env.unwrapped.get_action_meanings(): 33 | z = 'FIRE' in action 34 | left = 'LEFT' in action 35 | right = 'RIGHT' in action 36 | up = 'UP' in action 37 | down = 'DOWN' in action 38 | translated = atari_vnc(up=up, down=down, left=left, right=right, z=z) 39 | actions.append(translated) 40 | return spaces.Hardcoded(actions) 41 | else: 42 | raise error.Error('Unsupported env type: {}'.format(spec.id)) 43 | 44 | 45 | class CropAtari(vectorized.ObservationWrapper): 46 | """ 47 | Crop the relevant portion of the monitor where an Atari enviroment resides. 48 | """ 49 | 50 | def __init__(self, env): 51 | super(CropAtari, self).__init__(env) 52 | self.observation_space = gym_spaces.Box(0, 255, shape=(ATARI_HEIGHT, ATARI_WIDTH, 3)) 53 | 54 | def _observation(self, observation_n): 55 | return [{'vision': ob['vision'][:ATARI_HEIGHT, :ATARI_WIDTH, :]} for ob in observation_n] 56 | 57 | def one_hot(indices, depth): 58 | return np.eye(depth)[indices] 59 | 60 | class GymCoreAction(vectorized.ActionWrapper): 61 | def __init__(self, env, gym_core_id=None): 62 | super(GymCoreAction, self).__init__(env) 63 | 64 | if gym_core_id is None: 65 | # self.spec is None while inside of the make, so we need 66 | # to pass gym_core_id in explicitly there. This case will 67 | # be hit when instantiating by hand. 68 | gym_core_id = self.spec._kwargs['gym_core_id'] 69 | 70 | spec = gym.spec(gym_core_id) 71 | raw_action_space = gym_core_action_space(gym_core_id) 72 | 73 | self._actions = raw_action_space.actions 74 | self.action_space = gym_spaces.Discrete(len(self._actions)) 75 | 76 | if spec._entry_point.startswith('gym.envs.atari:'): 77 | self.key_state = translator.AtariKeyState(gym.make(gym_core_id)) 78 | else: 79 | self.key_state = None 80 | 81 | def _action(self, action_n): 82 | # Each action might be a length-1 np.array. Cast to int to 83 | # avoid warnings. 84 | return [self._actions[int(action)] for action in action_n] 85 | 86 | def _reverse_action(self, action_n): 87 | # Only works for core envs currently 88 | self.key_state.apply_vnc_actions(action_n) 89 | return one_hot(self.key_state.to_index(), self.action_space.n) 90 | 91 | class GymCoreObservation(vectorized.Wrapper): 92 | def __init__(self, env, gym_core_id=None): 93 | super(GymCoreObservation, self).__init__(env) 94 | 95 | if gym_core_id is None: 96 | # self.spec is None while inside of the make, so we need 97 | # to pass gym_core_id in explicitly there. This case will 98 | # be hit when instantiating by hand. 99 | gym_core_id = self.spec._kwargs['gym_core_id'] 100 | 101 | self._reward_n = None 102 | self._done_n = None 103 | self._info_n = None 104 | 105 | self._gym_core_env = gym.spec(gym_core_id).make() 106 | 107 | def _reset(self): 108 | observation_n = self.env.reset() 109 | self.reward_n = [0] * self.n 110 | self.done_n = [False] * self.n 111 | self.info = {'n': [{} for _ in range(self.n)]} 112 | new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)]) 113 | rewarder.merge_n( 114 | observation_n, self.reward_n, self.done_n, self.info, 115 | new_observation_n, new_reward_n, new_done_n, new_info 116 | ) 117 | return self._observation(self.done_n, self.info) 118 | 119 | def _step(self, action_n): 120 | observation_n, reward_n, done_n, info = self.env.step(action_n) 121 | if self.reward_n is not None: 122 | rewarder.merge_n( 123 | observation_n, reward_n, done_n, info, 124 | [None] * self.n, self.reward_n, self.done_n, self.info, 125 | ) 126 | self.reward_n = self.done_n = self.info = None 127 | return self._observation(done_n, info), reward_n, done_n, info 128 | 129 | def _observation(self, done_n, info): 130 | missing = set() 131 | 132 | observation_n = [None] * self.n 133 | for i, (done, info_i) in enumerate(zip(done_n, info['n'])): 134 | rewarder_observation = info_i.pop('rewarder.observation', None) 135 | if rewarder_observation is not None: 136 | observation, episode_id = rewarder_observation 137 | observation_n[i] = self._gym_core_env.observation_space.from_jsonable(observation) 138 | 139 | if done: 140 | # Check whether we should mask 141 | completed = info_i['env_status.completed_episode_id'] 142 | # Observation from old! 143 | if episode_id == completed: 144 | logger.debug('[%d] Masking rewarder_observation on episode boundary', i) 145 | observation_n[i] = None 146 | else: 147 | missing.add(i) 148 | 149 | if len(missing) > 0: 150 | logger.debug('Missing rewarder observations: %s', missing) 151 | return observation_n 152 | -------------------------------------------------------------------------------- /universe/wrappers/throttle.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | from universe import pyprofile, rewarder, spaces, vectorized 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | class Throttle(vectorized.Wrapper): 8 | """ 9 | A env wrapper that makes sending the action ASAP. 10 | 11 | Previous implementation would sleep first and then call env._step. 12 | This implementation calls env._step twice: 13 | 1. first call submits given action 14 | 2. after sleeping based on fps, second call submits empty action to 15 | receive observation. 16 | 17 | visual observation from first call is discarded. 18 | metadata and rewards from the two calls are merged. 19 | text observations are merged as well. 20 | """ 21 | def __init__(self, env): 22 | super(Throttle, self).__init__(env) 23 | 24 | self._steps = None 25 | 26 | def configure(self, skip_metadata=False, fps='default', **kwargs): 27 | if fps == 'default': 28 | fps = self.metadata['video.frames_per_second'] 29 | self.fps = fps 30 | self.skip_metadata = skip_metadata 31 | 32 | self.env.configure(**kwargs) 33 | self.diagnostics = self.unwrapped.diagnostics 34 | 35 | def _reset(self): 36 | # We avoid aggregating reward/info across episode boundaries 37 | # by caching it on the object 38 | self._deferred_reward_n = None 39 | self._deferred_done_n = None 40 | self._deferred_info_n = None 41 | 42 | observation = self.env.reset() 43 | self._start_timer() 44 | return observation 45 | 46 | def _step(self, action_n): 47 | if self._steps is None: 48 | self._start_timer() 49 | self._steps += 1 50 | 51 | accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep(action_n) 52 | accum_info['throttle.action.available_at'] = time.time() 53 | 54 | # Record which indexes we were just peeking at, so when we 55 | # make the follow-up we'll be sure to peek there too. 56 | peek_n = [any(spaces.PeekReward for peek in action) for action in action_n] 57 | 58 | if self.fps is None: 59 | return accum_observation_n, accum_reward_n, accum_done_n, accum_info 60 | 61 | accum_info['stats.throttle.sleep'] = 0 62 | while True: 63 | # See how much time we have to idle 64 | delta = self._start + 1./self.fps * self._steps - time.time() 65 | 66 | # The following assumes that our control loop 67 | if delta < 0: 68 | # We're out of time. Just get out of here. 69 | delta = abs(delta) 70 | if delta >= 1: 71 | logger.info('Throttle fell behind by %.2fs; lost %.2f frames', delta, self.fps*delta) 72 | pyprofile.timing('vnc_env.Throttle.lost_sleep', delta) 73 | self._start_timer() 74 | break 75 | # elif delta < 0.008: 76 | # # Only have 8ms. Let's spend it sleeping, and 77 | # # return an image which may have up to an 78 | # # additional 8ms lag. 79 | # # 80 | # # 8ms is reasonably arbitrary; we just want something 81 | # # that's small where it's not actually going to help 82 | # # if we make another step call. Step with 32 parallel 83 | # # envs takes about 6ms (about half of which is 84 | # # diagnostics, which could be totally async!), so 8 is 85 | # # a reasonable choice for now.. 86 | # pyprofile.timing('vnc_env.Throttle.sleep', delta) 87 | # accum_info['stats.throttle.sleep'] += delta 88 | # time.sleep(delta) 89 | # break 90 | else: 91 | # We've got plenty of time. Sleep for up to 16ms, and 92 | # then refresh our current frame. We need to 93 | # constantly be calling step so that our lags are 94 | # reported correctly, within 16ms. (The layering is 95 | # such that the vncdriver doesn't know which pixels 96 | # correspond to metadata, and the diagnostics don't 97 | # know when pixels first got painted. So we do our 98 | # best to present frames as they're ready to the 99 | # diagnostics.) 100 | delta = min(delta, 0.016) 101 | pyprofile.timing('vnc_env.Throttle.sleep', delta) 102 | accum_info['stats.throttle.sleep'] += delta 103 | time.sleep(delta) 104 | 105 | # We want to merge in the latest reward/done/info so that our 106 | # agent has the most up-to-date info post-sleep, but also want 107 | # to avoid popping any rewards where done=True (since we'd 108 | # have to merge across episode boundaries). 109 | action_n = [] 110 | for done, peek in zip(accum_done_n, peek_n): 111 | if done or peek: 112 | # No popping of reward/done 113 | action_n.append([spaces.PeekReward]) 114 | else: 115 | action_n.append([]) 116 | 117 | observation_n, reward_n, done_n, info = self._substep(action_n) 118 | 119 | # Merge observation, rewards and metadata. 120 | # Text observation has order in which the messages are sent. 121 | rewarder.merge_n( 122 | accum_observation_n, accum_reward_n, accum_done_n, accum_info, 123 | observation_n, reward_n, done_n, info, 124 | ) 125 | 126 | return accum_observation_n, accum_reward_n, accum_done_n, accum_info 127 | 128 | def _substep(self, action_n): 129 | with pyprofile.push('vnc_env.Throttle.step'): 130 | start = time.time() 131 | # Submit the action ASAP, before the thread goes to sleep. 132 | observation_n, reward_n, done_n, info = self.env.step(action_n) 133 | 134 | available_at = info['throttle.observation.available_at'] = time.time() 135 | if available_at - start > 1: 136 | logger.info('env.step took a long time: %.2fs', available_at - start) 137 | if not self.skip_metadata and self.diagnostics is not None: 138 | # Run (slow) diagnostics 139 | self.diagnostics.add_metadata(observation_n, info['n'], available_at=available_at) 140 | return observation_n, reward_n, done_n, info 141 | 142 | def _start_timer(self): 143 | self._start = time.time() 144 | self._steps = 0 145 | -------------------------------------------------------------------------------- /universe/remotes/healthcheck.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import logging 3 | import select 4 | import socket 5 | import time 6 | 7 | from universe import error, utils 8 | from gym.utils import reraise 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | def run(vnc_addresses, rewarder_addresses, timeout=None, start_timeout=None): 13 | healthcheck = Healthcheck(vnc_addresses, rewarder_addresses, timeout=timeout, start_timeout=start_timeout) 14 | healthcheck.run() 15 | 16 | def host_port(address, default_port=None): 17 | split = address.split(':') 18 | if len(split) == 1: 19 | host = split[0] 20 | port = default_port 21 | else: 22 | host, port = split 23 | port = int(port) 24 | return host, port 25 | 26 | class Healthcheck(object): 27 | def __init__(self, vnc_addresses, rewarder_addresses, timeout=None, start_timeout=None): 28 | self.timeout = timeout or (4 * len(vnc_addresses) + 20) 29 | self.start_timeout = start_timeout 30 | 31 | start_time = time.time() 32 | 33 | self.sockets = {} 34 | for address in vnc_addresses: 35 | self._register_vnc(address, start_time) 36 | for address in rewarder_addresses: 37 | self._register_rewarder(address, start_time) 38 | 39 | def _register_vnc(self, address, start_time=None): 40 | if start_time is None: 41 | start_time = time.time() 42 | 43 | host, port = host_port(address, default_port=5900) 44 | 45 | while True: 46 | # In VNC, the server sends bytes upon connection 47 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 48 | try: 49 | sock.connect((host, port)) 50 | except (socket.error, socket.gaierror) as e: 51 | # ECONNREFUSED: VNC env hasn't come up yet 52 | # ETIMEDOUT: the packets can't be delivered yet, such as can happen on kubernetes 53 | # gaierror: can't resolve the address yet, which can also happen on kubernetes 54 | expected = socket.errno.ECONNREFUSED == e.errno or socket.errno.ETIMEDOUT == e.errno or isinstance(e, socket.gaierror) 55 | if self.start_timeout is None or not expected: 56 | reraise(suffix='while connecting to VNC server {}'.format(address)) 57 | logger.info('VNC server %s did not come up yet (error: %s). Sleeping for 1s.', address, e) 58 | time.sleep(1) 59 | else: 60 | break 61 | 62 | if time.time() - start_time > self.start_timeout: 63 | raise error.Error('VNC server {} did not come up within {}s'.format(address, self.start_timeout)) 64 | 65 | self.sockets[sock] = ('vnc', address) 66 | 67 | def _register_rewarder(self, address, start_time=None): 68 | if start_time is None: 69 | start_time = time.time() 70 | 71 | host, port = host_port(address, default_port=15900) 72 | 73 | while True: 74 | # In WebSockets, the server sends bytes once we've upgraded the protocol 75 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 76 | try: 77 | sock.connect((host, port)) 78 | except (socket.error, socket.gaierror) as e: 79 | # ECONNREFUSED: VNC env hasn't come up yet 80 | # ETIMEDOUT: the packets can't be delivered yet, such as can happen on kubernetes 81 | # gaierror: can't resolve the address yet, which can also happen on kubernetes 82 | expected = socket.errno.ECONNREFUSED == e.errno or socket.errno.ETIMEDOUT == e.errno or isinstance(e, socket.gaierror) 83 | if self.start_timeout is None or not expected: 84 | reraise(suffix='while connecting to Rewarder server {}'.format(address)) 85 | logger.info('Rewarder server %s did not come up yet (error: %s). Sleeping for 1s.', address, e) 86 | time.sleep(1) 87 | else: 88 | break 89 | 90 | if time.time() - start_time > self.start_timeout: 91 | raise error.Error('Rewarder server {} did not come up within {}s'.format(address, self.start_timeout)) 92 | 93 | # Send a websocket handshake. 94 | # https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API/Writing_WebSocket_servers 95 | # 96 | # The port 10003 is an arbitrary port that we don't actually connect to, but needs to be a valid part 97 | # e.g Host: 127.0.0.1:GARBAGE results in the following error: (invalid port 'GARBAGE' in HTTP Host header '127.0.0.1:GARBAGE') 98 | sock.send(b'GET / HTTP/1.1\r\nHost: 127.0.0.1:10003\r\nUpgrade: WebSocket\r\nConnection:Upgrade\r\nSec-WebSocket-Key: dGhlIHNhbXBsZSBub25jZQ==\r\nSec-WebSocket-Version: 13\r\nauthorization: ' + utils.basic_auth_encode('openai').encode('utf-8') + b'\r\nopenai-observer: true\r\n\r\n') 99 | self.sockets[sock] = ('rewarder', address) 100 | 101 | def run(self): 102 | target = time.time() + self.timeout 103 | while self.sockets: 104 | remaining = target - time.time() 105 | if remaining < 0: 106 | break 107 | ready, _, _ = select.select(self.sockets.keys(), [], [], remaining) 108 | 109 | # Go through the readable sockets 110 | remote_closed = False 111 | for sock in ready: 112 | type, address = self.sockets.pop(sock) 113 | 114 | # Connection was closed; try again. 115 | # 116 | # This is guaranteed not to block. 117 | try: 118 | recv = sock.recv(1) 119 | except socket.error as e: 120 | if e.errno == errno.ECONNRESET: 121 | recv = b'' 122 | else: 123 | raise 124 | 125 | if recv == b'': 126 | logger.info('Remote closed: address=%s', address) 127 | remote_closed = True 128 | if type == 'rewarder': 129 | self._register_rewarder(address) 130 | else: 131 | self._register_vnc(address) 132 | else: 133 | logger.debug('Healthcheck passed for %s %s', type, address) 134 | 135 | sock.close() 136 | 137 | if remote_closed: 138 | sleep = 1 139 | logger.info('At least one sockets was closed by the remote. Sleeping %ds...', sleep) 140 | time.sleep(sleep) 141 | 142 | if self.sockets: 143 | raise error.Error('Not all servers came up within {}s: {}'.format(self.timeout, list(self.sockets.values()))) 144 | -------------------------------------------------------------------------------- /universe/wrappers/experimental/action_space.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import gym 4 | import numpy as np 5 | from universe import spaces 6 | from universe import vectorized 7 | from universe.wrappers.gym_core import gym_core_action_space 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | def slither_vnc(space=False, left=False, right=False): 12 | return [spaces.KeyEvent.by_name('space', down=space), 13 | spaces.KeyEvent.by_name('left', down=left), 14 | spaces.KeyEvent.by_name('right', down=right)] 15 | 16 | def racing_vnc(up=False, left=False, right=False): 17 | return [spaces.KeyEvent.by_name('up', down=up), 18 | spaces.KeyEvent.by_name('left', down=left), 19 | spaces.KeyEvent.by_name('right', down=right)] 20 | 21 | def platform_vnc(up=False, left=False, right=False, space=False): 22 | return [spaces.KeyEvent.by_name('up', down=up), 23 | spaces.KeyEvent.by_name('left', down=left), 24 | spaces.KeyEvent.by_name('right', down=right), 25 | spaces.KeyEvent.by_name('space', down=space)] 26 | 27 | 28 | class SafeActionSpace(vectorized.Wrapper): 29 | """ 30 | Recall that every universe environment receives a list of VNC events as action. 31 | There exist many environments for which the set of relevant action is much smaller 32 | and is known. For example, Atari environments have a modest number of keys, 33 | so this wrapper, when applied to an Atari environment will reduce its action space. 34 | Doing so is very convenient for research, since today's RL algorithms rely on random 35 | exploration, which is hurt by small action spaces. As our algorithms get better 36 | and we switch to using the raw VNC commands, this wrapper will become less important. 37 | """ 38 | def __init__(self, env): 39 | super(SafeActionSpace, self).__init__(env) 40 | 41 | if self.spec.tags.get('runtime') == 'gym-core': 42 | self.action_space = gym_core_action_space(self.spec._kwargs['gym_core_id']) 43 | elif self.spec is None: 44 | pass 45 | elif self.spec.id in ['internet.SlitherIO-v0', 46 | 'internet.SlitherIOErmiyaEskandaryBot-v0', 47 | 'internet.SlitherIOEasy-v0']: 48 | self.action_space = spaces.Hardcoded([slither_vnc(left=True), 49 | slither_vnc(right=True), 50 | slither_vnc(space=True), 51 | slither_vnc(left=True, space=True), 52 | slither_vnc(right=True, space=True)]) 53 | elif self.spec.id in ['flashgames.DuskDrive-v0']: 54 | # TODO: be more systematic 55 | self.action_space = spaces.Hardcoded([racing_vnc(up=True), 56 | racing_vnc(left=True), 57 | racing_vnc(right=True)]) 58 | elif self.spec.id in ['flashgames.RedBeard-v0']: 59 | self.action_space = spaces.Hardcoded([platform_vnc(up=True), 60 | platform_vnc(left=True), 61 | platform_vnc(right=True), 62 | platform_vnc(space=True)]) 63 | 64 | 65 | class SoftmaxClickMouse(vectorized.ActionWrapper): 66 | """ 67 | Creates a Discrete action space of mouse clicks. 68 | 69 | This wrapper divides the active region into cells and creates an action for 70 | each which clicks in the middle of the cell. 71 | """ 72 | def __init__(self, env, active_region=(10, 75 + 50, 10 + 160, 75 + 210), discrete_mouse_step=10, noclick_regions=[]): 73 | super(SoftmaxClickMouse, self).__init__(env) 74 | logger.info('Using SoftmaxClickMouse with action_region={}, noclick_regions={}'.format(active_region, noclick_regions)) 75 | xlow, ylow, xhigh, yhigh = active_region 76 | xs = range(xlow, xhigh, discrete_mouse_step) 77 | ys = range(ylow, yhigh, discrete_mouse_step) 78 | self.active_region = active_region 79 | self.discrete_mouse_step = discrete_mouse_step 80 | self.noclick_regions = noclick_regions 81 | self._points = [] 82 | removed = 0 83 | for x in xs: 84 | for y in ys: 85 | xc = min(x+int(discrete_mouse_step/2), xhigh-1) # click to center of a cell 86 | yc = min(y+int(discrete_mouse_step/2), yhigh-1) 87 | if any(self.is_contained((xc, yc), r) for r in noclick_regions): 88 | removed += 1 89 | continue 90 | self._points.append((xc, yc)) 91 | logger.info('SoftmaxClickMouse noclick regions removed {} of {} actions'.format(removed, removed + len(self._points))) 92 | self.action_space = gym.spaces.Discrete(len(self._points)) 93 | 94 | def _action(self, action_n): 95 | return [self._discrete_to_action(int(i)) for i in action_n] 96 | 97 | def _discrete_to_action(self, i): 98 | xc, yc = self._points[i] 99 | return [ 100 | spaces.PointerEvent(xc, yc, buttonmask=0), # release 101 | spaces.PointerEvent(xc, yc, buttonmask=1), # click 102 | spaces.PointerEvent(xc, yc, buttonmask=0), # release 103 | ] 104 | 105 | def _reverse_action(self, action): 106 | xlow, ylow, xhigh, yhigh = self.active_region 107 | try: 108 | # find first valid mousedown, ignore everything else 109 | click_event = next(e for e in action if isinstance(e, spaces.PointerEvent) and e.buttonmask == 1) 110 | index = self._action_to_discrete(click_event) 111 | if index is None: 112 | return np.zeros(len(self._points)) 113 | else: 114 | # return one-hot vector, expected by demo training code 115 | # FIXME(jgray): move one-hot translation to separate layer 116 | return np.eye(len(self._points))[index] 117 | except StopIteration: 118 | # no valid mousedowns 119 | return np.zeros(len(self._points)) 120 | 121 | def _action_to_discrete(self, event): 122 | assert isinstance(event, spaces.PointerEvent) 123 | x, y = event.x, event.y 124 | step = self.discrete_mouse_step 125 | xlow, ylow, xhigh, yhigh = self.active_region 126 | xc = min((int((x - xlow) / step) * step) + xlow + step / 2, xhigh - 1) 127 | yc = min((int((y - ylow) / step) * step) + ylow + step / 2, yhigh - 1) 128 | try: 129 | return self._points.index((xc, yc)) 130 | except ValueError: 131 | # ignore clicks outside of active region or in noclick regions 132 | return None 133 | 134 | @classmethod 135 | def is_contained(cls, point, coords): 136 | px, py = point 137 | x, width, y, height = coords 138 | return x <= px <= x + width and y <= py <= y + height 139 | -------------------------------------------------------------------------------- /universe/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import six 3 | import sys 4 | if six.PY2: 5 | import Queue as queue 6 | else: 7 | import queue 8 | import threading 9 | import signal 10 | from twisted.internet import defer 11 | 12 | from universe.twisty import reactor 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | class ErrorBuffer(object): 17 | def __init__(self): 18 | self.queue = queue.Queue() 19 | 20 | def __enter__(self): 21 | pass 22 | 23 | def __exit__(self, type, value, traceback): 24 | if value is not None: 25 | self.record(value) 26 | 27 | def __call__(self, error, wrap=True): 28 | self.record(error, wrap=True) 29 | 30 | def record(self, error, wrap=True): 31 | logger.debug('Error in thread %s: %s', threading.current_thread().name, error) 32 | if wrap: 33 | error = format_error(error) 34 | 35 | try: 36 | self.queue.put_nowait(error) 37 | except queue.Full: 38 | pass 39 | 40 | def check(self, timeout=None): 41 | if timeout is None: 42 | timeout = 0 43 | 44 | try: 45 | error = self.queue.get(timeout=timeout) 46 | except queue.Empty: 47 | return 48 | else: 49 | raise error 50 | 51 | def blocking_check(self, timeout=None): 52 | # TODO: get rid of this method 53 | if timeout is None: 54 | while True: 55 | self.check(timeout=3600) 56 | else: 57 | self.check(timeout) 58 | 59 | 60 | from twisted.python import failure 61 | import traceback 62 | import threading 63 | from universe import error 64 | def format_error(e): 65 | # errback automatically wraps everything in a Twisted Failure 66 | if isinstance(e, failure.Failure): 67 | e = e.value 68 | 69 | if isinstance(e, str): 70 | err_string = e 71 | elif six.PY2: 72 | err_string = traceback.format_exc(e).rstrip() 73 | else: 74 | err_string = ''.join(traceback.format_exception(type(e), e, e.__traceback__)).rstrip() 75 | 76 | if err_string == 'None': 77 | # Reasonable heuristic for exceptions that were created by hand 78 | last = traceback.format_stack()[-2] 79 | err_string = '{}\n {}'.format(e, last) 80 | # Quick and dirty hack for now. 81 | err_string = err_string.replace('Connection to the other side was lost in a non-clean fashion', 'Connection to the other side was lost in a non-clean fashion (HINT: this generally actually means we got a connection refused error. Check that the remote is actually running.)') 82 | return error.Error(err_string) 83 | 84 | def queue_get(local_queue): 85 | while True: 86 | try: 87 | result = local_queue.get(timeout=1000) 88 | except queue.Empty: 89 | pass 90 | else: 91 | return result 92 | 93 | def blockingCallFromThread(f, *a, **kw): 94 | local_queue = queue.Queue() 95 | def _callFromThread(): 96 | result = defer.maybeDeferred(f, *a, **kw) 97 | result.addBoth(local_queue.put) 98 | reactor.callFromThread(_callFromThread) 99 | result = queue_get(local_queue) 100 | if isinstance(result, failure.Failure): 101 | if result.frames: 102 | e = error.Error(str(result)) 103 | else: 104 | e = result.value 105 | raise e 106 | return result 107 | 108 | from gym import spaces 109 | def repeat_space(space, n): 110 | return spaces.Tuple([space] * n) 111 | 112 | import base64 113 | import uuid 114 | def random_alphanumeric(length=14): 115 | buf = [] 116 | while len(buf) < length: 117 | entropy = base64.encodestring(uuid.uuid4().bytes).decode('ascii') 118 | bytes = [c for c in entropy if c.isalnum()] 119 | buf += bytes 120 | return ''.join(buf)[:length] 121 | 122 | 123 | def best_effort(function, *args, **kwargs): 124 | try: 125 | return function(*args, **kwargs) 126 | except: 127 | if six.PY2: 128 | logging.error('Error in %s:', function.__name__) 129 | traceback.print_exc() 130 | else: 131 | logging.error('Error in %s:', function.__name__) 132 | logger.error(traceback.format_exc()) 133 | return None 134 | 135 | import base64 136 | def basic_auth_encode(username, password=''): 137 | fmt = '{}:{}'.format(username, password) 138 | return 'Basic ' + base64.encodestring(fmt.encode('utf-8')).rstrip().decode('utf-8') 139 | 140 | def basic_auth_decode(header): 141 | if header.startswith('Basic '): 142 | header = header[len('Basic '):] 143 | decoded = base64.decodestring(header.encode('utf-8')).decode('utf-8') 144 | username, password = decoded.split(':') 145 | return username, password 146 | else: 147 | return None 148 | 149 | import os 150 | def default_password(): 151 | if os.path.exists('/usr/local/openai/privileged_state/password'): 152 | with open('/usr/local/openai/privileged_state/password') as f: 153 | return f.read().strip() 154 | return 'openai' 155 | 156 | import logging 157 | import time 158 | logger = logging.getLogger(__name__) 159 | class PeriodicLog(object): 160 | def log(self, obj, name, msg, *args, **kwargs): 161 | try: 162 | info = obj._periodic_log_info 163 | except AttributeError: 164 | info = obj._periodic_log_info = {} 165 | 166 | # Would be better to use a frequency=... arg after kwargs, but 167 | # that isn't py2 compatible. 168 | frequency = kwargs.pop('frequency', 1) 169 | delay = kwargs.pop('delay', 0) 170 | last_log = info.setdefault(name, time.time()-frequency+delay) 171 | if time.time() - last_log < frequency: 172 | return 173 | info[name] = time.time() 174 | logger.info('[{}] {}'.format(name, msg), *args) 175 | 176 | def log_debug(self, obj, name, msg, *args, **kwargs): 177 | try: 178 | info = obj._periodic_log_debug 179 | except AttributeError: 180 | info = obj._periodic_log_debug = {} 181 | 182 | frequency = kwargs.pop('frequency', 1) 183 | delay = kwargs.pop('delay', 0) 184 | last_log = info.setdefault(name, time.time()-frequency+delay) 185 | if time.time() - last_log < frequency: 186 | return 187 | info[name] = time.time() 188 | logger.debug('[{}] {}'.format(name, msg), *args) 189 | 190 | _periodic = PeriodicLog() 191 | periodic_log = _periodic.log 192 | periodic_log_debug = _periodic.log_debug 193 | 194 | import threading 195 | def thread_name(): 196 | return threading.current_thread().name 197 | 198 | def exit_on_signal(): 199 | """ 200 | Install a signal handler for HUP, INT, and TERM to call exit, allowing clean shutdown. 201 | When running a universe environment, it's important to shut down the container when the 202 | agent dies so you should either call this or otherwise arrange to exit on signals. 203 | """ 204 | def shutdown(signal, frame): 205 | logger.warn('Received signal %s: exiting', signal) 206 | sys.exit(128+signal) 207 | signal.signal(signal.SIGHUP, shutdown) 208 | signal.signal(signal.SIGINT, shutdown) 209 | signal.signal(signal.SIGTERM, shutdown) 210 | --------------------------------------------------------------------------------