├── universe
    ├── envs
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── dusk-drive.png
    │   ├── vnc_core_env
    │   │   ├── __init__.py
    │   │   ├── translator.py
    │   │   └── vnc_core_env.py
    │   ├── vnc_internet.py
    │   ├── vnc_flashgames.py
    │   ├── vnc_gtav.py
    │   ├── vnc_wog.py
    │   ├── __init__.py
    │   ├── vnc_starcraft.py
    │   └── dummy_vnc_env.py
    ├── remotes
    │   ├── compose
    │   │   ├── __init__.py
    │   │   ├── signals.py
    │   │   ├── colors.py
    │   │   ├── utils.py
    │   │   └── progress_stream.py
    │   ├── __init__.py
    │   ├── remote.py
    │   ├── build.py
    │   ├── hardcoded_addresses.py
    │   └── healthcheck.py
    ├── vncdriver
    │   ├── vendor
    │   │   └── __init__.py
    │   ├── error.py
    │   ├── screen
    │   │   ├── base.py
    │   │   ├── __init__.py
    │   │   ├── screen_buffer.py
    │   │   └── pyglet_screen.py
    │   ├── README.md
    │   ├── __init__.py
    │   ├── auth.py
    │   ├── fbs_writer.py
    │   ├── fbs_reader.py
    │   ├── dual_proxy_server.py
    │   ├── vnc_session.py
    │   ├── libvnc_session.py
    │   └── constants.py
    ├── runtimes
    │   ├── .agignore
    │   ├── __init__.py
    │   └── registration.py
    ├── kube
    │   ├── __init__.py
    │   └── discovery.py
    ├── spaces
    │   ├── diagnostics.py
    │   ├── vnc_observation_space.py
    │   ├── __init__.py
    │   ├── hardcoded.py
    │   ├── joystick_event.py
    │   ├── joystick_action_space.py
    │   ├── vnc_event.py
    │   └── vnc_action_space.py
    ├── runtimes.yml
    ├── wrappers
    │   ├── experimental
    │   │   ├── __init__.py
    │   │   ├── random_env.py
    │   │   ├── observation.py
    │   │   └── action_space.py
    │   ├── action_space.py
    │   ├── diagnostics.py
    │   ├── tests
    │   │   ├── test_joint.py
    │   │   └── test_time_limit.py
    │   ├── vision.py
    │   ├── timer.py
    │   ├── render.py
    │   ├── time_limit.py
    │   ├── __init__.py
    │   ├── blocking_reset.py
    │   ├── joint.py
    │   ├── vectorize.py
    │   ├── multiprocessing_env.py
    │   ├── monitoring.py
    │   ├── gym_core_sync.py
    │   ├── gym_core.py
    │   └── throttle.py
    ├── vectorized
    │   ├── __init__.py
    │   ├── tests
    │   │   └── test_monitoring.py
    │   ├── core.py
    │   └── vectorize_filter.py
    ├── error.py
    ├── rewarder
    │   ├── __init__.py
    │   ├── connection_timer.py
    │   ├── merge.py
    │   ├── tests
    │   │   └── test_reward_buffer.py
    │   └── env_status.py
    ├── configuration.py
    ├── twisty.py
    ├── scoreboard
    │   └── __init__.py
    └── utils
    │   ├── display.py
    │   └── __init__.py
├── .dockerignore
├── doc
    ├── dusk-drive.png
    └── env_semantics.rst
├── example
    ├── starter-cluster
    │   └── starter-cluster-requirements.txt
    ├── random-agent
    │   └── random-agent.py
    ├── recorders
    │   ├── vnc_recorder.py
    │   ├── reward_recorder.py
    │   └── botaction_recorder.py
    └── system-diagnostics
    │   └── system_diagnostics_logger.py
├── .gitignore
├── test.dockerfile
├── ISSUE_TEMPLATE
├── tox.ini
├── CODE_OF_CONDUCT.rst
├── Makefile
├── setup.py
├── LICENSE
├── Dockerfile
├── .travis.yml
└── tests
    └── functional
        ├── test_envs.py
        └── test_core_envs_semantics.py


/universe/envs/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/universe/remotes/compose/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/universe/vncdriver/vendor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/universe/runtimes/.agignore:
--------------------------------------------------------------------------------
1 | flashgames.json
2 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | vncdriver/logs
2 | .git
3 | *.pyc
4 | 


--------------------------------------------------------------------------------
/universe/vncdriver/error.py:
--------------------------------------------------------------------------------
1 | class Error(Exception):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/doc/dusk-drive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/universe/HEAD/doc/dusk-drive.png


--------------------------------------------------------------------------------
/universe/kube/__init__.py:
--------------------------------------------------------------------------------
1 | from universe.kube.discovery import discover, discover_batches
2 | 


--------------------------------------------------------------------------------
/universe/spaces/diagnostics.py:
--------------------------------------------------------------------------------
1 | class DiagnosticEvent(object):
2 |     pass
3 | 
4 | PeekReward = DiagnosticEvent()
5 | 


--------------------------------------------------------------------------------
/universe/envs/tests/dusk-drive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/universe/HEAD/universe/envs/tests/dusk-drive.png


--------------------------------------------------------------------------------
/universe/runtimes.yml:
--------------------------------------------------------------------------------
1 | flashgames:
2 |   tag: 0.20.28
3 | gym-core:
4 |   tag: 0.20.6
5 | world-of-bits:
6 |   tag: 0.20.0
7 | 


--------------------------------------------------------------------------------
/universe/vncdriver/screen/base.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import time
3 | 
4 | logger = logging.getLogger(__name__)
5 | 
6 | class Screen(object):
7 |     pass
8 | 


--------------------------------------------------------------------------------
/example/starter-cluster/starter-cluster-requirements.txt:
--------------------------------------------------------------------------------
1 | boto3>=1.4.2
2 | click>=6.6
3 | docker-py==1.10.6
4 | PyYAML>=3.12
5 | universe>=0.1.0
6 | docker-compose>=1.9.0
7 | 


--------------------------------------------------------------------------------
/universe/envs/vnc_core_env/__init__.py:
--------------------------------------------------------------------------------
1 | from universe.envs.vnc_core_env.vnc_core_env import GymCoreEnv, GymCoreSyncEnv
2 | from universe.envs.vnc_core_env.translator import AtariTranslator, CartPoleTranslator
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .ropeproject
 3 | *.pyc
 4 | tags
 5 | *.swo
 6 | *.swp
 7 | *.sqlite
 8 | upload/
 9 | venv
10 | dist
11 | *.egg-info
12 | .idea
13 | /logs
14 | /dist
15 | build/
16 | /.gitfiles
17 | /.tox
18 | /.cache
19 | 


--------------------------------------------------------------------------------
/universe/envs/vnc_internet.py:
--------------------------------------------------------------------------------
1 | from universe.envs import vnc_env
2 | 
3 | class InternetEnv(vnc_env.VNCEnv):
4 |      def __init__(self):
5 |         super(InternetEnv, self).__init__()
6 |         self._probe_key = 0x60  # backtick `
7 | 


--------------------------------------------------------------------------------
/universe/envs/vnc_flashgames.py:
--------------------------------------------------------------------------------
1 | from universe.envs import vnc_env
2 | 
3 | class FlashgamesEnv(vnc_env.VNCEnv):
4 |      def __init__(self):
5 |         super(FlashgamesEnv, self).__init__()
6 |         self._probe_key = 0x60  # backtick `
7 | 


--------------------------------------------------------------------------------
/universe/remotes/__init__.py:
--------------------------------------------------------------------------------
1 | from universe.remotes.hardcoded_addresses import HardcodedAddresses
2 | from universe.remotes.allocator_remote import AllocatorManager
3 | from universe.remotes.docker_remote import DockerManager
4 | from universe.remotes.build import build
5 | 


--------------------------------------------------------------------------------
/universe/spaces/vnc_observation_space.py:
--------------------------------------------------------------------------------
1 | import gym
2 | 
3 | class VNCObservationSpace(gym.Space):
4 |     # For now, we leave the VNC ObservationSpace wide open, since
5 |     # there isn't much use-case for this object.
6 |     def contains(self, x):
7 |         return True
8 | 


--------------------------------------------------------------------------------
/universe/wrappers/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | from universe.wrappers.experimental.action_space import SafeActionSpace, SoftmaxClickMouse
2 | from universe.wrappers.experimental.observation import CropObservations
3 | from universe.wrappers.experimental.random_env import RandomEnv
4 | 


--------------------------------------------------------------------------------
/universe/vectorized/__init__.py:
--------------------------------------------------------------------------------
1 | from universe.vectorized.core import Env, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
2 | from universe.vectorized.multiprocessing_env import MultiprocessingEnv
3 | from universe.vectorized.vectorize_filter import Filter, VectorizeFilter
4 | 


--------------------------------------------------------------------------------
/universe/error.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | class Error(Exception):
 4 |     pass
 5 | 
 6 | class RPCError(Error):
 7 |     pass
 8 | 
 9 | class ConnectionError(Error):
10 |     pass
11 | 
12 | class TimeoutError(Error):
13 |     pass
14 | 
15 | class AuthenticationError(Error):
16 |     pass
17 | 


--------------------------------------------------------------------------------
/universe/vncdriver/screen/__init__.py:
--------------------------------------------------------------------------------
1 | from universe.vncdriver.screen.base import Screen
2 | from universe.vncdriver.screen.numpy_screen import NumpyScreen
3 | from universe.vncdriver.screen.pyglet_screen import PygletScreen
4 | from universe.vncdriver.screen.screen_buffer import ScreenBuffer
5 | 


--------------------------------------------------------------------------------
/test.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM quay.io/openai/universe
 2 | 
 3 | RUN pip install tox
 4 | 
 5 | # Upload our actual code
 6 | WORKDIR /usr/local/universe/
 7 | COPY . ./
 8 | 
 9 | # Run tox. Keep printing so Travis knows we're alive.
10 | CMD ["bash", "-c", "( while true; do echo '.'; sleep 60; done ) & tox"]
11 | 


--------------------------------------------------------------------------------
/universe/vncdriver/README.md:
--------------------------------------------------------------------------------
1 | # Python VNC driver implementation
2 | 
3 | This Python VNC driver is using an older API, and needs a small amount
4 | of work to once again become a good backend. We haven't bothered with
5 | this since the Go driver is much faster. We would take a pull request
6 | to fix it though!
7 | 


--------------------------------------------------------------------------------
/universe/rewarder/__init__.py:
--------------------------------------------------------------------------------
1 | from universe.rewarder.rewarder_session import RewarderSession
2 | from universe.rewarder.env_status import EnvStatus, compare_ids
3 | from universe.rewarder.merge import merge_n, merge_infos, merge_reward_n, merge_observation_n
4 | from universe.rewarder.reward_buffer import RewardBuffer
5 | 


--------------------------------------------------------------------------------
/universe/vncdriver/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | from universe.vncdriver.vnc_session import VNCSession
4 | from universe.vncdriver.vnc_client import client_factory
5 | from universe.vncdriver.screen import NumpyScreen, PygletScreen
6 | 
7 | logger = logging.getLogger(__name__)
8 | logger.setLevel(logging.INFO)
9 | 


--------------------------------------------------------------------------------
/universe/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from universe.spaces.hardcoded import Hardcoded
2 | from universe.spaces.vnc_action_space import VNCActionSpace
3 | from universe.spaces.vnc_event import VNCEvent, KeyEvent, PointerEvent
4 | from universe.spaces.vnc_observation_space import VNCObservationSpace
5 | 
6 | from universe.spaces.diagnostics import PeekReward
7 | 


--------------------------------------------------------------------------------
/ISSUE_TEMPLATE:
--------------------------------------------------------------------------------
 1 | 
 2 | (First, please check https://github.com/openai/universe/wiki/Solutions-to-common-problems for solutions to many common problems)
 3 | 
 4 | ### Expected behavior
 5 | 
 6 | ### Actual behavior
 7 | 
 8 | ### Versions
 9 | Please include the result of running
10 | ```
11 | $ uname -a ; python --version; pip show universe gym tensorflow numpy go-vncdriver Pillow
12 | ```
13 | 


--------------------------------------------------------------------------------
/universe/envs/vnc_gtav.py:
--------------------------------------------------------------------------------
 1 | from universe.envs import vnc_env
 2 | from universe.spaces.joystick_action_space import JoystickActionSpace
 3 | 
 4 | 
 5 | class GTAVEnv(vnc_env.VNCEnv):
 6 |     def __init__(self):
 7 |         super(GTAVEnv, self).__init__()
 8 |         self.action_space = JoystickActionSpace(axis_x=True, axis_z=True)
 9 |         self._send_actions_over_websockets = True
10 |         self._skip_network_calibration = True
11 | 
12 | 


--------------------------------------------------------------------------------
/universe/spaces/hardcoded.py:
--------------------------------------------------------------------------------
 1 | from gym.spaces import prng
 2 | 
 3 | class Hardcoded(object):
 4 |     def __init__(self, actions):
 5 |         self.actions = actions
 6 | 
 7 |     def contains(self, action):
 8 |         return action in self.actions
 9 | 
10 |     def sample(self):
11 |         i = prng.np_random.randint(len(self.actions))
12 |         return self.actions[i]
13 | 
14 |     def __getitem__(self, i):
15 |         return self.actions[i]
16 | 


--------------------------------------------------------------------------------
/universe/configuration.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from gym import configuration
 3 | 
 4 | universe_logger = logging.getLogger('universe')
 5 | universe_logger.setLevel(logging.INFO)
 6 | 
 7 | extra_logger = logging.getLogger('universe.extra')
 8 | extra_logger.setLevel(logging.INFO)
 9 | 
10 | if hasattr(configuration, '_extra_loggers'):
11 |     configuration._extra_loggers.append(universe_logger)
12 |     configuration._extra_loggers.append(extra_logger)
13 | 


--------------------------------------------------------------------------------
/universe/envs/vnc_wog.py:
--------------------------------------------------------------------------------
 1 | from universe.envs import vnc_env
 2 | from universe.spaces import VNCActionSpace
 3 | 
 4 | 
 5 | class WorldOfGooEnv(vnc_env.VNCEnv):
 6 |     def __init__(self):
 7 |         super(WorldOfGooEnv, self).__init__()
 8 |         # TODO: set action space screen shape to match
 9 |         # HACK: empty keys list fails for some weird reason, give it an 'a'
10 |         self.action_space = VNCActionSpace(keys=['a'], buttonmasks=[1])
11 | 


--------------------------------------------------------------------------------
/universe/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | import universe.envs.vnc_env
 2 | from universe.envs.vnc_env import VNCEnv
 3 | from universe.envs.vnc_core_env import GymCoreEnv, GymCoreSyncEnv
 4 | from universe.envs.vnc_flashgames import FlashgamesEnv
 5 | from universe.envs.vnc_internet import InternetEnv
 6 | from universe.envs.vnc_starcraft import StarCraftEnv
 7 | from universe.envs.vnc_gtav import GTAVEnv
 8 | from universe.envs.vnc_wog import WorldOfGooEnv
 9 | from universe.envs.dummy_vnc_env import DummyVNCEnv
10 | 


--------------------------------------------------------------------------------
/universe/remotes/compose/signals.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import unicode_literals
 3 | 
 4 | import signal
 5 | 
 6 | 
 7 | class ShutdownException(Exception):
 8 |     pass
 9 | 
10 | 
11 | def shutdown(signal, frame):
12 |     raise ShutdownException()
13 | 
14 | 
15 | def set_signal_handler(handler):
16 |     signal.signal(signal.SIGINT, handler)
17 |     signal.signal(signal.SIGTERM, handler)
18 | 
19 | 
20 | def set_signal_handler_to_shutdown():
21 |     set_signal_handler(shutdown)
22 | 


--------------------------------------------------------------------------------
/universe/remotes/remote.py:
--------------------------------------------------------------------------------
 1 | class Remote(object):
 2 |     def __init__(self, handle, vnc_address, vnc_password, rewarder_address, rewarder_password, name=None):
 3 |         self.name = name
 4 |         self.handle = handle
 5 |         self.vnc_address = vnc_address
 6 |         self.vnc_password = vnc_password
 7 |         self.rewarder_address = rewarder_address
 8 |         self.rewarder_password = rewarder_password
 9 | 
10 |     def __str__(self):
11 |         return 'Remote<{}:{}>'.format(self.handle, self.name)
12 | 
13 |     def __repr__(self):
14 |         return str(self)
15 | 


--------------------------------------------------------------------------------
/universe/wrappers/action_space.py:
--------------------------------------------------------------------------------
 1 | class SoftmaxClickMouse():
 2 |     def init(self):
 3 |         raise DeprecationWarning('DEPRECATION WARNING: wrappers.SoftmaxClickMouse has been moved to wrappers.experimental.action_space.SoftmaxClickMouse as of 2017-02-08.')
 4 | 
 5 | 
 6 | class SafeActionSpace():
 7 |     def init(self):
 8 |         raise DeprecationWarning('DEPRECATION WARNING: wrappers.SafeActionSpace has been moved to '
 9 |                      'wrappers.experimental.action_space.SafeActionSpace as of 2017-01-07. '
10 |                      'Using legacy wrappers.SafeActionSpace will soon be removed')
11 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # Tox (http://tox.testrun.org/) is a tool for running tests
 2 | # in multiple virtualenvs. This configuration file will run the
 3 | # test suite on all supported python versions. To use it, "pip install tox"
 4 | # and then run "tox" from this directory.
 5 | 
 6 | [tox]
 7 | envlist = py27, py35
 8 | skipsdist=True
 9 | 
10 | [testenv]
11 | passenv=DISPLAY DOCKER_USERNAME DOCKER_PASSWORD FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES TRAVIS*
12 | deps =
13 |     pytest
14 |     gym[atari]<0.9
15 |     docker-py==1.10.3
16 |     Pillow
17 |     autobahn
18 |     twisted
19 |     ujson
20 |     boto
21 | commands =
22 |     pip install -e /usr/local/universe
23 |     pytest {posargs}
24 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
 1 | OpenAI is dedicated to providing a harassment-free experience for
 2 | everyone, regardless of gender, gender identity and expression, sexual
 3 | orientation, disability, physical appearance, body size, age, race, or
 4 | religion. We do not tolerate harassment of participants in any form.
 5 | 
 6 | This code of conduct applies to all OpenAI spaces both online and
 7 | off. Anyone who violates this code of conduct may be sanctioned or
 8 | expelled from these spaces at the discretion of the OpenAI team.
 9 | 
10 | We may add additional rules over time, which will be made clearly
11 | available to participants. Participants are responsible for knowing
12 | and abiding by these rules.
13 | 


--------------------------------------------------------------------------------
/universe/wrappers/diagnostics.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import six
 3 | from universe import pyprofile, vectorized
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | # Not used in core; but used in play_flashgames
 8 | class Diagnostics(vectorized.Wrapper):
 9 | 
10 |     def _step(self, action_n):
11 |         observation_n, reward_n, done_n, info = self.env.step(action_n)
12 |         # We want this to be above Mask, so we know whether or not a
13 |         # particular index is resetting.
14 |         if self.unwrapped.diagnostics:
15 |             with pyprofile.push('vnc_env.diagnostics.add_metadata'):
16 |                 self.unwrapped.diagnostics.add_metadata(observation_n, info['n'])
17 |         return observation_n, reward_n, done_n, info
18 | 


--------------------------------------------------------------------------------
/universe/wrappers/tests/test_joint.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import universe
 3 | from universe import wrappers
 4 | 
 5 | def test_joint():
 6 |     env1 = gym.make('test.DummyVNCEnv-v0')
 7 |     env2 = gym.make('test.DummyVNCEnv-v0')
 8 |     env1.configure(_n=3)
 9 |     env2.configure(_n=3)
10 |     for reward_buffer in [env1._reward_buffers[0], env2._reward_buffers[0]]:
11 |         reward_buffer.set_env_info('running', 'test.DummyVNCEnv-v0', '1', 60)
12 |         reward_buffer.reset('1')
13 |         reward_buffer.push('1', 10, False, {})
14 | 
15 |     env = wrappers.Joint([env1, env2])
16 |     assert env.n == 6
17 |     observation_n = env.reset()
18 |     assert observation_n == [None] * 6
19 | 
20 |     observation_n, reward_n, done_n, info = env.step([[] for _ in range(env.n)])
21 |     assert reward_n == [10.0, 0.0, 0.0, 10.0, 0.0, 0.0]
22 |     assert done_n == [False] * 6
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | upload:
 2 | 	rm -rf dist
 3 | 	python setup.py sdist
 4 | 	twine upload dist/*
 5 | 
 6 | test:
 7 | 	find . -name '*.pyc' -delete
 8 | 	docker build -f test.dockerfile -t quay.io/openai/universe:test .
 9 | 	docker run -v /usr/bin/docker:/usr/bin/docker -v /root/.docker:/root/.docker -v /var/run/docker.sock:/var/run/docker.sock --net=host quay.io/openai/universe:test
10 | 
11 | build:
12 | 	find . -name '*.pyc' -delete
13 | 	docker build -t quay.io/openai/universe .
14 | 	docker build -f test.dockerfile -t quay.io/openai/universe:test .
15 | 
16 | push:
17 | 	find . -name '*.pyc' -delete
18 | 	docker build -t quay.io/openai/universe .
19 | 	docker build -f test.dockerfile -t quay.io/openai/universe:test .
20 | 
21 | 	docker push quay.io/openai/universe
22 | 	docker push quay.io/openai/universe:test
23 | 
24 | test-push:
25 | 	docker build -f test.dockerfile -t quay.io/openai/universe:test .
26 | 	docker push quay.io/openai/universe:test
27 | 


--------------------------------------------------------------------------------
/universe/wrappers/vision.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from universe import vectorized
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | class Vision(vectorized.Wrapper):
 7 |     """
 8 | At present, an observation from a vectorized universe environment returns a list of 
 9 | dicts. Each dict contains input data for each modality.  Modalities include 'vision'
10 | and 'text', and it is possible to add other modalities in the future (such as 'audio').
11 | 
12 | The Vision wrapper extracts the vision modality and discards all others.  This is convenient
13 | when we only care about the visual input.
14 | """
15 | 
16 |     def _reset(self):
17 |         observation_n = self.env.reset()
18 |         return [ob['vision'] if ob is not None else ob for ob in observation_n]
19 | 
20 |     def _step(self, action_n):
21 |         observation_n, reward_n, done_n, info_n = self.env.step(action_n)
22 |         observation_n = [ob['vision'] if ob is not None else ob for ob in observation_n]
23 |         return observation_n, reward_n, done_n, info_n
24 | 


--------------------------------------------------------------------------------
/universe/wrappers/experimental/random_env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from universe import vectorized
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | class RandomEnv(vectorized.Wrapper):
 7 |     '''
 8 |     Randomly sample from a list of env_ids between episodes.
 9 | 
10 |     Passes a list of env_ids to configure. When done=True, calls env.reset()
11 |     to sample from the list.
12 |     '''
13 |     def __init__(self, env, env_ids):
14 |         super(RandomEnv, self).__init__(env)
15 |         self.env_ids = env_ids
16 | 
17 |     def configure(self, **kwargs):
18 |         super(RandomEnv, self).configure(sample_env_ids=self.env_ids, **kwargs)
19 | 
20 |     def _reset(self):
21 |         observation_n = self.env.reset()
22 |         return [ob['vision'] if ob is not None else ob for ob in observation_n]
23 | 
24 |     def _step(self, action_n):
25 |         assert self.n == 1
26 |         observation, reward, done, info = self.env.step(action_n)
27 |         if any(done):
28 |             self.env.reset()
29 |         return observation, reward, done, info
30 | 


--------------------------------------------------------------------------------
/universe/wrappers/timer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | from universe import pyprofile, vectorized
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | class Timer(vectorized.Wrapper):
 8 |     """
 9 | Calculate how much time was spent actually doing work.  Display result
10 | via pyprofile.
11 | """
12 | 
13 |     def configure(self, **kwargs):
14 |         self.env.configure(**kwargs)
15 | 
16 |     def _reset(self):
17 |         with pyprofile.push('vnc_env.Timer.reset'):
18 |             return self.env.reset()
19 | 
20 |     def _step(self, action_n):
21 |         start = time.time()
22 |         with pyprofile.push('vnc_env.Timer.step'):
23 |             observation_n, reward_n, done_n, info = self.env.step(action_n)
24 | 
25 |         # Calculate how much time was spent actually doing work
26 |         sleep = info.get('stats.throttle.sleep')
27 |         if sleep is None or sleep < 0:
28 |             sleep = 0
29 |         pyprofile.timing('vnc_env.Timer.step.excluding_sleep', time.time() - start - sleep)
30 |         return observation_n, reward_n, done_n, info
31 | 


--------------------------------------------------------------------------------
/universe/remotes/compose/colors.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import unicode_literals
 3 | NAMES = [
 4 |     'grey',
 5 |     'red',
 6 |     'green',
 7 |     'yellow',
 8 |     'blue',
 9 |     'magenta',
10 |     'cyan',
11 |     'white'
12 | ]
13 | 
14 | 
15 | def get_pairs():
16 |     for i, name in enumerate(NAMES):
17 |         yield(name, str(30 + i))
18 |         yield('intense_' + name, str(30 + i) + ';1')
19 | 
20 | 
21 | def ansi(code):
22 |     return '\033[{0}m'.format(code)
23 | 
24 | 
25 | def ansi_color(code, s):
26 |     return '{0}{1}{2}'.format(ansi(code), s, ansi(0))
27 | 
28 | 
29 | def make_color_fn(code):
30 |     return lambda s: ansi_color(code, s)
31 | 
32 | 
33 | for (name, code) in get_pairs():
34 |     globals()[name] = make_color_fn(code)
35 | 
36 | 
37 | def rainbow():
38 |     cs = ['cyan', 'yellow', 'green', 'magenta', 'red', 'blue',
39 |           'intense_cyan', 'intense_yellow', 'intense_green',
40 |           'intense_magenta', 'intense_red', 'intense_blue']
41 | 
42 |     for c in cs:
43 |         yield globals()[c]
44 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(name='universe',
 4 |       version='0.21.5',
 5 |       description="Universe: a software platform for measuring and training an AI's general intelligence across the world's supply of games, websites and other applications.",
 6 |       url='https://github.com/openai/universe',
 7 |       author='OpenAI',
 8 |       author_email='universe@openai.com',
 9 |       packages=[package for package in find_packages()
10 |                 if package.startswith('universe')],
11 |       install_requires=[
12 |           'autobahn>=0.16.0',
13 |           'docker-py==1.10.3',
14 |           'docker-pycreds==0.2.1',
15 |           'fastzbarlight>=0.0.13',
16 |           'go-vncdriver>=0.4.8',
17 |           'gym>=0.8.1',
18 |           'Pillow>=3.3.0',
19 |           'PyYAML>=3.12',
20 |           'six>=1.10.0',
21 |           'twisted>=16.5.0',
22 |           'ujson>=1.35',
23 |       ],
24 |       package_data={'universe': ['runtimes.yml', 'runtimes/flashgames.json']},
25 |       tests_require=['pytest'],
26 |       extras_require={
27 |           'atari': 'gym[atari]',
28 |       }
29 |       )
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2016 OpenAI (http://openai.com)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/universe/spaces/joystick_event.py:
--------------------------------------------------------------------------------
 1 | class JoystickEvent(object):
 2 |     pass
 3 | 
 4 | 
 5 | class JoystickAxisEvent(JoystickEvent):
 6 |     def __init__(self, amount):
 7 |         self.amount = float(amount)
 8 | 
 9 |     def __repr__(self):
10 |         return str(type(self)) + '<amount={}>'.format(self.amount)
11 | 
12 |     def __str__(self):
13 |         return repr(self)
14 | 
15 |     def __hash__(self):
16 |         return self.amount.__hash__()
17 | 
18 |     def __eq__(self, other):
19 |         return type(other) == type(self) and \
20 |                other.amount == self.amount
21 | 
22 |     def compile(self):
23 |         return type(self).__name__, self.amount
24 | 
25 | 
26 | class JoystickAxisXEvent(JoystickAxisEvent):
27 |     pass
28 | 
29 | 
30 | class JoystickAxisYEvent(JoystickAxisEvent):
31 |     pass
32 | 
33 | 
34 | class JoystickAxisZEvent(JoystickAxisEvent):
35 |     pass
36 | 
37 | 
38 | class JoystickAxisRxEvent(JoystickAxisEvent):
39 |     pass
40 | 
41 | 
42 | class JoystickAxisRyEvent(JoystickAxisEvent):
43 |     pass
44 | 
45 | 
46 | class JoystickAxisRzEvent(JoystickAxisEvent):
47 |     pass
48 | 
49 | 
50 | class JoystickSlider0Event(JoystickAxisEvent):
51 |     pass
52 | 
53 | 
54 | class JoystickSlider1Event(JoystickAxisEvent):
55 |     pass
56 | 


--------------------------------------------------------------------------------
/universe/vncdriver/auth.py:
--------------------------------------------------------------------------------
 1 | import six
 2 | import uuid
 3 | 
 4 | from universe import utils
 5 | from universe.vncdriver.vendor import pydes
 6 | 
 7 | class RFBDes(pydes.des):
 8 |     def setKey(self, key):
 9 |         key = key.encode('ascii')
10 | 
11 |         newkey = []
12 |         for ki in range(len(key)):
13 |             if six.PY2:
14 |                 bsrc = ord(key[ki])
15 |             else:
16 |                 bsrc = key[ki]
17 | 
18 |             # Reverse the bits
19 |             btgt = 0
20 |             for i in range(8):
21 |                 if bsrc & (1 << i):
22 |                     btgt = btgt | (1 << 7-i)
23 | 
24 |             if six.PY2:
25 |                 newkey.append(chr(btgt))
26 |             else:
27 |                 newkey.append(btgt)
28 | 
29 |         super(RFBDes, self).setKey(newkey)
30 | 
31 | def challenge():
32 |     length = 16
33 |     buf = b''
34 |     while len(buf) < length:
35 |         entropy = uuid.uuid4().bytes
36 |         buf += entropy
37 |     return buf[:length]
38 | 
39 | def challenge_response(challenge, password=None):
40 |     if password is None:
41 |         password = utils.default_password()
42 |     password += ((8 - len(password)) % 8) * '\0'  # pad to multiple of 8 bytes
43 |     des = RFBDes(password)
44 |     return des.encrypt(challenge)
45 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | 
 3 | RUN apt-get update \
 4 |     && apt-get install -y libav-tools \
 5 |     python3-numpy \
 6 |     python3-scipy \
 7 |     python3-setuptools \
 8 |     python3-pip \
 9 |     libpq-dev \
10 |     libjpeg-dev \
11 |     curl \
12 |     cmake \
13 |     swig \
14 |     python3-opengl \
15 |     libboost-all-dev \
16 |     libsdl2-dev \
17 |     wget \
18 |     unzip \
19 |     git \
20 |     golang \
21 |     net-tools \
22 |     iptables \
23 |     libvncserver-dev \
24 |     software-properties-common \
25 |     && apt-get clean \
26 |     && rm -rf /var/lib/apt/lists/*
27 | 
28 | RUN ln -sf /usr/bin/pip3 /usr/local/bin/pip \
29 |     && ln -sf /usr/bin/python3 /usr/local/bin/python \
30 |     && pip install -U pip
31 | 
32 | # Install gym
33 | RUN pip install gym[all]
34 | 
35 | # Get the faster VNC driver
36 | RUN pip install go-vncdriver>=0.4.0
37 | 
38 | # Install pytest (for running test cases)
39 | RUN pip install pytest
40 | 
41 | # Force the container to use the go vnc driver
42 | ENV UNIVERSE_VNCDRIVER='go'
43 | 
44 | WORKDIR /usr/local/universe/
45 | 
46 | # Cachebusting
47 | COPY ./setup.py ./
48 | COPY ./tox.ini ./
49 | 
50 | RUN pip install -e .
51 | 
52 | # Upload our actual code
53 | COPY . ./
54 | 
55 | # Just in case any python cache files were carried over from the source directory, remove them
56 | RUN py3clean .
57 | 


--------------------------------------------------------------------------------
/universe/vectorized/tests/test_monitoring.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | 
 4 | import gym.monitoring
 5 | from gym.monitoring.tests import helpers
 6 | from universe import wrappers
 7 | 
 8 | def test_multiprocessing_env_monitoring():
 9 |     with helpers.tempdir() as temp:
10 |         env = wrappers.WrappedMultiprocessingEnv('Pong-v3')
11 |         env.configure(n=2)
12 |         env = wrappers.Monitor(env, temp)
13 |         env.reset()
14 |         for i in range(2):
15 |             env.step([0, 0])
16 |         env.close()
17 |         manifests = glob.glob(os.path.join(temp, '*.video.*'))
18 |         assert len(manifests) == 2, 'There are {} manifests: {}'.format(len(manifests), manifests)
19 | 
20 |         results = gym.monitoring.load_results(temp)
21 |         assert results['env_info']['env_id'] == 'Pong-v3'
22 | 
23 | def test_vnc_monitoring():
24 |     with helpers.tempdir() as temp:
25 |         env = gym.make('gym-core.Pong-v3')
26 |         env.configure(remotes=2)
27 |         env = wrappers.GymCoreAction(env)
28 |         env = wrappers.Monitor(env, temp)
29 | 
30 |         env.reset()
31 |         for i in range(2):
32 |             env.step([0, 0])
33 |         env.close()
34 | 
35 |         results = gym.monitoring.load_results(temp)
36 |         assert results['env_info']['env_id'] == 'gym-core.Pong-v3'
37 | 
38 | if __name__ == '__main__':
39 |     test_multiprocessing_env_monitoring()
40 |     test_vnc_monitoring()
41 | 


--------------------------------------------------------------------------------
/example/random-agent/random-agent.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | import logging
 4 | import sys
 5 | 
 6 | import gym
 7 | import universe # register the universe environments
 8 | 
 9 | from universe import wrappers
10 | 
11 | logger = logging.getLogger()
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser(description=None)
15 |     parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
16 |     args = parser.parse_args()
17 | 
18 |     if args.verbosity == 0:
19 |         logger.setLevel(logging.INFO)
20 |     elif args.verbosity >= 1:
21 |         logger.setLevel(logging.DEBUG)
22 | 
23 | 
24 |     env = gym.make('flashgames.NeonRace-v0')
25 |     env.configure(remotes=1)  # automatically creates a local docker container
26 |     
27 |     # Restrict the valid random actions. (Try removing this and see
28 |     # what happens when the agent is given full control of the
29 |     # keyboard/mouse.)
30 |     env = wrappers.experimental.SafeActionSpace(env)
31 |     observation_n = env.reset()
32 | 
33 |     while True:
34 |         # your agent here
35 |         #
36 |         # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True)
37 |         action_n = [env.action_space.sample() for ob in observation_n]
38 |         observation_n, reward_n, done_n, info = env.step(action_n)
39 |         env.render()
40 | 
41 |     return 0
42 | 
43 | if __name__ == '__main__':
44 |     sys.exit(main())
45 | 


--------------------------------------------------------------------------------
/universe/runtimes/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | from universe.runtimes.registration import register_runtime
 5 | 
 6 | with open(os.path.join(os.path.dirname(__file__), '../runtimes.yml')) as f:
 7 |     spec = yaml.load(f)
 8 | 
 9 | # If you have a local repo, do something like
10 | # export OPENAI_DOCKER_REPO=docker.openai.com  (this one only for openai folks)
11 | docker_repo = os.environ.get('OPENAI_DOCKER_REPO', 'quay.io/openai')
12 | 
13 | register_runtime(
14 |     id='gym-core',
15 |     kind='docker',
16 |     image=docker_repo + '/universe.gym-core:{}'.format(spec['gym-core']['tag']),
17 | )
18 | 
19 | register_runtime(
20 |     id='flashgames',
21 |     kind='docker',
22 |     image=docker_repo + '/universe.flashgames:{}'.format(spec['flashgames']['tag']),
23 |     host_config={
24 |         'privileged': True,
25 |         'cap_add': ['SYS_ADMIN'],
26 |         'ipc_mode': 'host',
27 |     },
28 |     default_params={'cpu': 3.9, 'livestream_url': None},
29 |     server_registry_file=os.path.join(os.path.dirname(__file__), 'flashgames.json'),
30 | )
31 | 
32 | register_runtime(
33 |     id='world-of-bits',
34 |     kind='docker',
35 |     image=docker_repo + '/universe.world-of-bits:{}'.format(spec['world-of-bits']['tag']),
36 |     host_config={
37 |         'privileged': True,
38 |         'cap_add': ['SYS_ADMIN'],
39 |         'ipc_mode': 'host'
40 |     })
41 | 
42 | register_runtime(
43 |     id='vnc-windows',
44 |     kind='windows',
45 | )
46 | 
47 | del spec
48 | 


--------------------------------------------------------------------------------
/example/recorders/vnc_recorder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | import logging
 4 | import os
 5 | import re
 6 | import sys
 7 | 
 8 | from universe import utils
 9 | from universe.vncdriver import vnc_proxy_server
10 | from twisted.internet import protocol, reactor
11 | 
12 | logger = logging.getLogger()
13 | 
14 | def main():
15 |     parser = argparse.ArgumentParser(description=None)
16 |     parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
17 |     parser.add_argument('-l', '--listen-address', default='0.0.0.0:5899', help='Address to listen on')
18 |     parser.add_argument('-s', '--vnc-address', default='127.0.0.1:5900', help='Address of the VNC server to run on.')
19 |     parser.add_argument('-d', '--logfile-dir', default=None, help='Base directory to write logs for each connection')
20 |     args = parser.parse_args()
21 | 
22 |     if args.verbosity == 0:
23 |         logger.setLevel(logging.INFO)
24 |     elif args.verbosity >= 1:
25 |         logger.setLevel(logging.DEBUG)
26 | 
27 |     factory = protocol.ServerFactory()
28 |     factory.protocol = vnc_proxy_server.VNCProxyServer
29 |     factory.vnc_address = 'tcp:{}'.format(args.vnc_address)
30 |     factory.logfile_dir = args.logfile_dir
31 |     factory.recorder_id = utils.random_alphanumeric().lower()
32 | 
33 |     host, port = args.listen_address.split(':')
34 |     port = int(port)
35 | 
36 |     logger.info('Listening on %s:%s', host, port)
37 |     reactor.listenTCP(port, factory, interface=host)
38 |     reactor.run()
39 |     return 0
40 | 
41 | if __name__ == '__main__':
42 |     sys.exit(main())
43 | 


--------------------------------------------------------------------------------
/example/recorders/reward_recorder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | import logging
 4 | import sys
 5 | 
 6 | from autobahn.twisted import websocket
 7 | from universe.rewarder import reward_proxy_server
 8 | from universe.twisty import reactor
 9 | 
10 | logger = logging.getLogger()
11 | 
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser(description=None)
15 |     parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
16 |     parser.add_argument('-l', '--listen-address', default='0.0.0.0:15898', help='Address to listen on')
17 |     parser.add_argument('-s', '--rewarder-address', default='127.0.0.1:15900', help='Address of the reward server to run on.')
18 |     parser.add_argument('-d', '--logfile-dir', default=None, help='Base directory to write logs for each connection')
19 |     args = parser.parse_args()
20 | 
21 |     if args.verbosity == 0:
22 |         logger.setLevel(logging.INFO)
23 |     elif args.verbosity >= 1:
24 |         logger.setLevel(logging.DEBUG)
25 | 
26 |     factory = websocket.WebSocketServerFactory()
27 |     factory.protocol = reward_proxy_server.RewardProxyServer
28 |     factory.rewarder_address = args.rewarder_address
29 |     factory.logfile_dir = args.logfile_dir
30 |     factory.setProtocolOptions(maxConnections=1)  # We only write reward logs to one place, so only allow one connection
31 | 
32 |     host, port = args.listen_address.split(':')
33 |     port = int(port)
34 |     logger.info('Listening on %s:%s', host, port)
35 |     reactor.listenTCP(port, factory)
36 |     reactor.run()
37 |     return 0
38 | 
39 | if __name__ == '__main__':
40 |     sys.exit(main())
41 | 


--------------------------------------------------------------------------------
/universe/twisty.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | from twisted.python.runtime import platform
 3 | 
 4 | # On OSX, we should use kqueue rather than the default select
 5 | # backend. (Proximal issue is that select only can handle a limited
 6 | # number of file descriptors.)
 7 | #
 8 | # Based off twisted.internet.default
 9 | def _get_reactor(platform):
10 |     try:
11 |         if platform.isLinux():
12 |             try:
13 |                 from twisted.internet import epollreactor
14 |                 cls = epollreactor.EPollReactor
15 |             except ImportError:
16 |                 from twisted.internet import pollreactor
17 |                 cls = pollreactor.PollReactor
18 |         elif platform.isMacOSX():
19 |             from twisted.internet import kqreactor
20 |             cls = kqreactor.KQueueReactor
21 |         elif platform.getType() == 'posix' and not platform.isMacOSX():
22 |             from twisted.internet import pollreactor
23 |             cls = pollreactor.PollReactor
24 |         else:
25 |             from twisted.internet import selectreactor
26 |             cls = selectreactor.SelectReactor
27 |     except ImportError:
28 |         from twisted.internet import selectreactor
29 |         cls = selectreactor.SelectReactor
30 |     return cls()
31 | 
32 | class TwistedThread(threading.Thread):
33 |     started = False
34 |     daemon = True
35 | 
36 |     @classmethod
37 |     def start_once(cls):
38 |         if cls.started:
39 |             return
40 |         cls.started = True
41 | 
42 |         instance = cls(name='Twisted')
43 |         instance.start()
44 | 
45 |     def run(self):
46 |         reactor.run(installSignalHandlers=False)
47 | 
48 | reactor = _get_reactor(platform)
49 | start_once = TwistedThread.start_once
50 | 


--------------------------------------------------------------------------------
/universe/remotes/build.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from universe import error
 3 | from universe.remotes.allocator_remote import AllocatorManager
 4 | from universe.remotes.docker_remote import DockerManager
 5 | from universe.remotes.hardcoded_addresses import HardcodedAddresses
 6 | 
 7 | def build(client_id, remotes, runtime=None, start_timeout=None, **kwargs):
 8 |     if isinstance(remotes, int):
 9 |         remotes = str(remotes)
10 |     elif not isinstance(remotes, str):
11 |         raise error.Error('remotes argument must be a string, got {} which is of type {}'.format(remotes, type(remotes)))
12 | 
13 |     if re.search('^\d+$', remotes): # an integer, like -r 20
14 |         n = int(remotes)
15 |         return DockerManager(
16 |             runtime=runtime,
17 |             start_timeout=start_timeout,
18 |             reuse=kwargs.get('reuse', False),
19 |             n=n,
20 |         ), n
21 |     elif remotes.startswith('vnc://'):
22 |         return HardcodedAddresses.build(
23 |             remotes,
24 |             start_timeout=start_timeout)
25 |     elif remotes.startswith('http://') or remotes.startswith('https://'):
26 |         if runtime is None:
27 |             raise error.Error('Must provide a runtime. HINT: try creating your env instance via gym.make("flashgames.DuskDrive-v0")')
28 | 
29 |         manager, n = AllocatorManager.from_remotes(
30 |             client_id,
31 |             remotes,
32 |             runtime_id=runtime.id,
33 |             runtime_tag=runtime.image.split(':')[-1],
34 |             start_timeout=start_timeout,
35 |             api_key=kwargs.get('api_key'),
36 |             use_recorder_ports=kwargs.get('use_recorder_ports', False),
37 |         )
38 |         manager.start()
39 |         return manager, n
40 |     else:
41 |         raise error.Error('Invalid remotes: {!r}. Must be an integer or must start with vnc:// or https://'.format(remotes))
42 | 


--------------------------------------------------------------------------------
/universe/vectorized/core.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | from gym import spaces
 3 | from universe import error
 4 | 
 5 | class Env(gym.Env):
 6 |     """Base class capable of handling vectorized environments.
 7 |     """
 8 |     metadata = {
 9 |         # This key indicates whether an env is vectorized (or, in the case of
10 |         # Wrappers where autovectorize=True, whether they should automatically
11 |         # be wrapped by a Vectorize wrapper.)
12 |         'runtime.vectorized': True,
13 |     }
14 | 
15 |     # Number of remotes. User should set this.
16 |     n = None
17 | 
18 | 
19 | class Wrapper(Env, gym.Wrapper):
20 |     """Use this instead of gym.Wrapper iff you're wrapping a vectorized env,
21 |     (or a vanilla env you wish to be vectorized).
22 |     """
23 |     # If True and this is instantiated with a non-vectorized environment,
24 |     # automatically wrap it with the Vectorize wrapper.
25 |     autovectorize = True
26 | 
27 |     def __init__(self, env):
28 |         super(Wrapper, self).__init__(env)
29 |         if not env.metadata.get('runtime.vectorized'):
30 |             if self.autovectorize:
31 |                 # Circular dependency :(
32 |                 from universe import wrappers
33 |                 env = wrappers.Vectorize(env)
34 |             else:
35 |                 raise error.Error('This wrapper can only wrap vectorized envs (i.e. where env.metadata["runtime.vectorized"] = True), not {}. Set "self.autovectorize = True" to automatically add a Vectorize wrapper.'.format(env))
36 | 
37 |         self.env = env
38 | 
39 |     @property
40 |     def n(self):
41 |         return self.env.n
42 | 
43 |     def configure(self, **kwargs):
44 |         self.env.configure(**kwargs)
45 | 
46 | class ObservationWrapper(Wrapper, gym.ObservationWrapper):
47 |     pass
48 | 
49 | class RewardWrapper(Wrapper, gym.RewardWrapper):
50 |     pass
51 | 
52 | class ActionWrapper(Wrapper, gym.ActionWrapper):
53 |     pass
54 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.5"
 4 | services:
 5 |   - docker
 6 | before_install:
 7 |   - docker build -f test.dockerfile -t quay.io/openai/universe:test .
 8 | script:
 9 |   - docker run -v /usr/bin/docker:/usr/bin/docker -v /root/.docker:/root/.docker -v /var/run/docker.sock:/var/run/docker.sock --net=host quay.io/openai/universe:test
10 | notifications:
11 |   slack:
12 |     secure: HtkwTGU+cQbpQuRaMuC2ZcuaaJfUBEZxSaChkj74lFulHAc6g/Xj1ztzj/roR/kMl3dycYPl5QL5AkxPPD/x8BweOJmgabe9boPbU9+80tpa0ueZnt0q6vX23ZA7EcqIAOwQqHiaklxoCkSflpV2N9GP20yBf5YNneHWsbFc8RDuJmNsg8s+1sZIrT3aOcvAJmu8WrNVclKvnpH/qCtvkK6npXZvdMvGpQPT/uCYOyPcbURqelk7qzNpT0oJmkrutbkT3Hp03NRDEQgS47pTPMC5pklea5zDkyh++ETEMpXU75UgN3CURKhuf/oyq7JorG/lXQaz6HBYbcT9EhPVpTzPZEczk50VAp3RWWcN6NczJJ9rVL0h+bGZmcOlJz9igNl838ziL6nxMFO9W3psXQUoBvEDo+vXPDEOUxeBrtLqUN1vfQmMw7KKiGIimInWigW19WfVQhSt47+xKKmbvBKtQ/w8lCDlwO5h7QbApv6TiaGzxtzdJMAyhNOZE7KxqvtFCJgKL4ZfmVzziLlbdbr582Cc0wxvGLDC341+CqkYVv83oimM8Ks3wHRT/ABoO1uXOSsZniUU/+oU/mzyrhrkGNNSDCwdJ0mVEWRGTYZs26IcBIeYGsLJrv3J9ZgfiyD2Knl4/yVI0IbTs7qAzhBzsXvt9aH7kH7tXYZH9QQ=
13 |   webhooks:
14 |     urls:
15 |       - https://hooks.zapier.com/hooks/catch/1711022/6ztmzh/
16 |       - https://hooks.zapier.com/hooks/catch/1711022/6zhc8p/
17 |     on_success: always
18 |     on_failure: always
19 | after_success:
20 |   - export BRANCH=$(if [ "$TRAVIS_PULL_REQUEST" == "false" ]; then echo $TRAVIS_BRANCH; else echo $TRAVIS_PULL_REQUEST_BRANCH; fi)
21 |   - echo "TRAVIS_BRANCH=$TRAVIS_BRANCH, PR=$PR, BRANCH=$BRANCH"
22 |   - docker login quay.io -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"
23 |   - docker tag quay.io/openai/universe:test quay.io/openai/universe-travis:passed-ci
24 |   - if [ "$BRANCH" == "master" ]; then ( while true; do echo '.'; sleep 60; done ) & docker push quay.io/openai/universe-travis:passed-ci; fi # This repo is used by universe-envs to run integration test. We echo in order to keep travis alive during a slow push
25 | 


--------------------------------------------------------------------------------
/universe/wrappers/experimental/observation.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from universe import vectorized, runtime_spec
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | logger.setLevel(logging.INFO)
 7 | 
 8 | 
 9 | def CropObservations(env):
10 |     """"
11 |     Crops the visual observations of an environment so that they only contain the game screen.
12 |     Removes anything outside the game that usually belongs to universe (browser borders and so on).
13 |     """
14 |     if env.spec.tags.get('flashgames', False):
15 |         spec = runtime_spec('flashgames').server_registry[env.spec.id]
16 |         return _CropObservations(env, x=18, y=84, height=spec["height"], width=spec["width"])
17 |     elif (env.spec.tags.get('atari', False) and env.spec.tags.get('vnc', False)):
18 |         return _CropObservations(env, height=194, width=160)
19 |     else:
20 |         # if unknown environment (or local atari), do nothing
21 |         return env
22 | 
23 | class _CropObservations(vectorized.ObservationWrapper):
24 |     def __init__(self, env, height, width, x=0, y=0):
25 |         super(_CropObservations, self).__init__(env)
26 |         self.x = x
27 |         self.y = y
28 |         self.height = height
29 |         self.width = width
30 | 
31 |         # modify observation_space? (if so, how to know depth and channels before we have seen the first frame?)
32 |         # self.observation_space = Box(0, 255, shape=(height, width, 3))
33 | 
34 |     def _observation(self, observation_n):
35 |         return [self._crop_frame(observation) for observation in observation_n]
36 | 
37 |     def _crop_frame(self, frame):
38 |         if frame is not None:
39 |             if isinstance(frame, dict):
40 |                 frame['vision'] = frame['vision'][self.y:self.y + self.height, self.x:self.x + self.width]
41 |             else:
42 |                 frame = frame[self.y:self.y + self.height, self.x:self.x + self.width]
43 |         return frame
44 | 


--------------------------------------------------------------------------------
/universe/vectorized/vectorize_filter.py:
--------------------------------------------------------------------------------
 1 | from universe.vectorized import core
 2 | 
 3 | class Filter(object):
 4 |     def _after_reset(self, observation):
 5 |         return observation
 6 | 
 7 |     def _after_step(self, observation, reward, done, info):
 8 |         return observation, reward, done, info
 9 | 
10 | class VectorizeFilter(core.Wrapper):
11 |     """Vectorizes a Filter written for the non-vectorized case."""
12 | 
13 |     autovectorize = False
14 |     metadata = {
15 |         'configure.required': True
16 |     }
17 | 
18 |     def __init__(self, env, filter_factory, *args, **kwargs):
19 |         super(VectorizeFilter, self).__init__(env)
20 |         self.filter_factory = filter_factory
21 |         self.filter_n = None
22 |         self._args = args
23 |         self._kwargs = kwargs
24 | 
25 |     def _reset(self):
26 |         if self.filter_n is None:
27 |             self.filter_n = [self.filter_factory(*self._args, **self._kwargs) for _ in range(self.n)]
28 |         observation_n = self.env.reset()
29 |         observation_n = [filter._after_reset(observation) for filter, observation in zip(self.filter_n, observation_n)]
30 |         return observation_n
31 | 
32 |     def _step(self, action_n):
33 |         o_n, r_n, d_n, i = self.env.step(action_n)
34 | 
35 |         observation_n = []
36 |         reward_n = []
37 |         done_n = []
38 |         info = i.copy()
39 |         info['n'] = []
40 |         for filter, observation, reward, done, info_i in zip(self.filter_n, o_n, r_n, d_n, i['n']):
41 |             observation, reward, done, info_i = filter._after_step(observation, reward, done, info_i)
42 |             observation_n.append(observation)
43 |             reward_n.append(reward)
44 |             done_n.append(done)
45 |             info['n'].append(info_i)
46 |         return observation_n, reward_n, done_n, info
47 | 
48 |     def __str__(self):
49 |         return '<{}[{}]{}>'.format(type(self).__name__, self.filter_factory, self.env)
50 | 


--------------------------------------------------------------------------------
/universe/wrappers/render.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from twisted.python.runtime import platform
 4 | from universe import vectorized
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | class Render(vectorized.Wrapper):
 9 |     metadata = {
10 |         'configure.required': True
11 |     }
12 |     def __init__(self, *args, **kwargs):
13 |         if platform.isLinux() and not os.environ.get('DISPLAY'):
14 |             self.renderable = False
15 |         else:
16 |             self.renderable = True
17 |         self._observation = None
18 |         super(Render, self).__init__(*args, **kwargs)
19 | 
20 |     def configure(self, **kwargs):
21 |         self.env.configure(**kwargs)
22 |         self.metadata = self.metadata.copy()
23 |         modes = self.metadata.setdefault('render.modes', [])
24 |         if 'rgb_array' not in modes:
25 |             modes.append('rgb_array')
26 | 
27 |     def _reset(self):
28 |         observation_n = self.env.reset()
29 |         self._observation = observation_n[0]
30 |         return observation_n
31 | 
32 |     def _step(self, action_n):
33 |         observation_n, reward_n, done_n, info_n = self.env.step(action_n)
34 |         self._observation = observation_n[0]
35 |         return observation_n, reward_n, done_n, info_n
36 | 
37 |     def _render(self, mode='human', *args, **kwargs):
38 |         if not self.renderable and mode == 'human':
39 |             return
40 |         elif self.env is None:
41 |             # Only when this breaks
42 |             return
43 |         elif mode == 'rgb_array':
44 |             if self._observation is not None:
45 |                 observation = self._observation
46 |                 if isinstance(self._observation, dict):
47 |                     observation = observation['vision']
48 |                 return observation
49 |             else:
50 |                 return None
51 |         # Could log, but no need.
52 |         return self.env.render(mode=mode, *args, **kwargs)
53 | 


--------------------------------------------------------------------------------
/universe/vncdriver/fbs_writer.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import struct
 3 | import time
 4 | 
 5 | from gym.utils import atomic_write, closer
 6 | 
 7 | fbs_closer = closer.Closer()
 8 | 
 9 | class FBSWriter(object):
10 |     def __init__(self, path):
11 |         self._closed = False
12 | 
13 |         self.start = None
14 |         self.stop = None
15 | 
16 |         self._id = fbs_closer.register(self)
17 | 
18 |         self.file = open(path, 'wb')
19 |         # custom format: exactly the same as FBS 001.000 except:
20 |         #
21 |         # FBS 001.002
22 |         # {line-of-json}
23 |         # [length-byte, data, timestamp]...
24 |         # \0\0\0\0 {line-of-json}
25 |         self.file.write(b'FBS 001.002\n')
26 | 
27 |     def write(self, data):
28 |         # Format:
29 |         #
30 |         # length
31 |         # data
32 |         # timestamp (4 bytes)
33 | 
34 |         if not data:
35 |             return
36 | 
37 |         if self.start is not None:
38 |             delta = int(1000 * (time.time() - self.start))
39 |         else:
40 |             delta = 0
41 |             self.start = time.time()
42 | 
43 |             # Write metadata header
44 |             self.file.write(json.dumps({'start': self.start}).encode('utf-8'))
45 |             self.file.write(b'\n')
46 | 
47 |         length = struct.pack('!I', len(data))
48 |         self.file.write(length)
49 |         self.file.write(data)
50 | 
51 |         delta = struct.pack('!I', delta)
52 |         self.file.write(delta)
53 | 
54 |     def _write_metadata(self):
55 |         # Write metadata trailer
56 |         null = struct.pack('!I', 0)
57 |         self.file.write(null)
58 |         self.file.write(json.dumps({'stop': self.stop}).encode('utf-8'))
59 |         self.file.write(b'\n')
60 | 
61 |     def close(self):
62 |         if self._closed:
63 |             return
64 |         self._closed = True
65 | 
66 |         fbs_closer.unregister(self._id)
67 |         self.stop = time.time()
68 |         self._write_metadata()
69 |         self.file.close()
70 | 
71 |     def __del__(self):
72 |         self.close()
73 | 


--------------------------------------------------------------------------------
/universe/wrappers/time_limit.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import time
 4 | from universe import pyprofile
 5 | from universe.vectorized import core
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | DEFAULT_MAX_EPISODE_SECONDS = 20 * 60.  # Default to 20 minutes if there is no explicit limit
10 | 
11 | class UniverseTimeLimit(core.Wrapper):
12 |     def __init__(self, env):
13 |         super(UniverseTimeLimit, self).__init__(env)
14 |         self._max_episode_seconds = self.env.spec.max_episode_seconds
15 |         self._max_episode_steps = self.env.spec.max_episode_steps
16 | 
17 |         if self._max_episode_seconds is None and self._max_episode_steps is None:
18 |             self._max_episode_seconds = DEFAULT_MAX_EPISODE_SECONDS
19 | 
20 |         self._elapsed_steps = 0
21 |         self._episode_started_at = None
22 | 
23 |     @property
24 |     def _elapsed_seconds(self):
25 |         return time.time() - self._episode_started_at
26 | 
27 |     def _past_limit(self):
28 |         """Return true if we are past our limit"""
29 |         if self._max_episode_steps is not None and self._max_episode_steps <= self._elapsed_steps:
30 |             logger.debug("Env has passed the step limit defined by TimeLimit.")
31 |             return True
32 | 
33 |         if self._max_episode_seconds is not None and self._max_episode_seconds <= self._elapsed_seconds:
34 |             logger.debug("Env has passed the seconds limit defined by TimeLimit.")
35 |             return True
36 | 
37 |         return False
38 | 
39 |     def _step(self, action_n):
40 |         assert self._episode_started_at is not None, "Cannot call env.step() before calling reset()"
41 |         observation_n, reward_n, done_n, info = self.env.step(action_n)
42 |         self._elapsed_steps += 1
43 | 
44 |         if self._past_limit():
45 |             _ = self.reset()  # Force a reset, discard the observation
46 |             done_n = [True] * self.n  # Force a done = True
47 | 
48 |         return observation_n, reward_n, done_n, info
49 | 
50 |     def _reset(self):
51 |         self._episode_started_at = time.time()
52 |         self._elapsed_steps = 0
53 |         return self.env.reset()
54 | TimeLimit = UniverseTimeLimit
55 | 


--------------------------------------------------------------------------------
/universe/vncdriver/fbs_reader.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import struct
 4 | 
 5 | from universe import error
 6 | 
 7 | class InvalidFBSFileError(error.Error):
 8 |     pass
 9 | 
10 | class FBSReader(object):
11 |     def __init__(self, path):
12 |         self.file = open(path, 'rb')
13 |         version = self.file.read(12)
14 |         if version != b'FBS 001.002\n':
15 |             raise InvalidFBSFileError('Unrecognized FBS version: {}'.format(version))
16 | 
17 |         header = self.file.readline()
18 |         pos = self.file.tell()
19 |         self.file.seek(pos, os.SEEK_SET)
20 | 
21 |         header = json.loads(header.decode('utf-8'))
22 |         self.start = header['start']
23 | 
24 |     def __iter__(self):
25 |         return self
26 | 
27 |     def read_safe(self, size=None):
28 |         """
29 |         We currently close our fbs files by killing them, so sometimes they end
30 |         up with bad data at the end. Close our reader if we expect `size` bytes
31 |         and get fewer.
32 | 
33 |         This is a hack and should be removed when we cleanly close our
34 |         connections in fbs_writer.
35 | 
36 |         https://github.com/openai/universe-envs/issues/41
37 |         """
38 |         bytes = self.file.read(size)
39 |         if len(bytes) != size:
40 |             # We unexpectedly got to the end of the file
41 |             self.close()
42 |             raise StopIteration
43 |         return bytes
44 | 
45 |     def next(self):
46 |         return self.__next__()
47 | 
48 |     def __next__(self):
49 |         length_str = self.read_safe(4)
50 |         if length_str == '':
51 |             # Indicates a file with no trailer
52 |             self.close()
53 |             raise StopIteration
54 |         (length,) = struct.unpack('!I', length_str)
55 | 
56 |         if length == 0:
57 |             # Reached the end
58 |             self.close()
59 |             raise StopIteration()
60 | 
61 |         data = self.read_safe(length)
62 |         timestamp_str = self.read_safe(4)
63 |         (timestamp,) = struct.unpack('!I', timestamp_str)
64 | 
65 |         return data, self.start + timestamp/1000.
66 | 
67 |     def close(self):
68 |         self.file.close()
69 | 


--------------------------------------------------------------------------------
/universe/vncdriver/screen/screen_buffer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | import threading
 4 | 
 5 | from universe.vncdriver.screen import numpy_screen
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | class ScreenBuffer(object):
10 |     def __init__(self):
11 |         self.lock = threading.Lock()
12 |         self.uncommitted = []
13 |         self.updates = []
14 | 
15 |     def apply_format(self, attrs):
16 |         self._push({
17 |             'type': 'apply_format',
18 |             'attrs': attrs,
19 |         })
20 | 
21 |     def update_rectangle(self, x, y, width, height, data):
22 |         self._push({
23 |             'type': 'update_rectangle',
24 |             'x': x,
25 |             'y': y,
26 |             'width': width,
27 |             'height': height,
28 |             'data': data,
29 |         })
30 | 
31 |     def copy_rectangle(self, src_x, src_y, x, y, width, height):
32 |         self._push({
33 |             'type': 'copy_rectangle',
34 |             'src_x': src_x,
35 |             'src_y': src_y,
36 |             'x': x,
37 |             'y': y,
38 |             'width': width,
39 |             'height': height,
40 |         })
41 | 
42 |     def fill_rectangle(self, x, y, width, height, color):
43 |         self._push({
44 |             'type': 'fill_rectangle',
45 |             'x': x,
46 |             'y': y,
47 |             'width': width,
48 |             'height': height,
49 |             'color': color,
50 |         })
51 | 
52 |     def framebuffer_update_finish(self):
53 |         with self.lock:
54 |             self.updates += self.uncommitted
55 |             self.uncommitted = []
56 | 
57 |     def _push(self, update):
58 |         """Always call from single thread."""
59 |         self.uncommitted.append(update)
60 | 
61 |     def pop(self):
62 |         with self.lock:
63 |             if self.updates:
64 |                 updates = self.updates
65 |                 self.updates = []
66 |                 return updates
67 |             else:
68 |                 return None
69 | 
70 |     def peek(self):
71 |         with self.lock:
72 |             if self.updates:
73 |                 return self.updates
74 |             else:
75 |                 return None
76 | 


--------------------------------------------------------------------------------
/example/system-diagnostics/system_diagnostics_logger.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import json
 4 | import psutil
 5 | import time
 6 | 
 7 | class DiagnosticsLogger(object):
 8 |     def __init__(self, interval=5):
 9 |         self.interval = interval
10 |         self.last_cpu_times = {}  # pid -> (user, sys)
11 | 
12 |     def run(self):
13 |         while True:
14 |             cpu_times, chrome_reset = self.cpu_times()
15 |             print(json.dumps({
16 |                 'time': time.time(),
17 |                 'cpu_times': cpu_times,
18 |                 'cpu_percent': psutil.cpu_percent(percpu=True),
19 |                 'chrome_reset': chrome_reset,
20 |             }), flush=True)
21 |             self.chrome_reset = False
22 |             time.sleep(self.interval)
23 | 
24 |     def get_chrome_procs(self):
25 |         def is_chrome(proc):
26 |             try:
27 |                 return proc.name() == 'chrome'
28 |             except psutil.ZombieProcess:
29 |                 return False
30 |         return [p for p in psutil.process_iter() if is_chrome(p)]
31 | 
32 |     def cpu_times(self):
33 |         ''' return {pid: {'user': 0.0, 'sys': 0.0}}, chrome_reset '''
34 |         chrome_procs = self.get_chrome_procs()
35 |         new_pids = {p.pid for p in chrome_procs}
36 |         old_pids = {pid for pid in self.last_cpu_times}
37 |         try:
38 |             cpu_times = {p.pid: p.cpu_times() for p in chrome_procs}
39 |         except psutil.NoSuchProcess:
40 |             # Chrome restarted since fetching the new pids above. Better luck next time.
41 |             return {}, True
42 |         if new_pids != old_pids:
43 |             # We don't know when the Chrome procs were restarted, so don't
44 |             # return elapsed time until next run.
45 |             self.last_cpu_times = cpu_times
46 |             return {}, True
47 |         # Same chrome pids as last run: measure the elapsed cpu times
48 |         ordered_old_times = (self.last_cpu_times[p.pid] for p in chrome_procs)
49 |         ordered_new_times = (cpu_times[p.pid] for p in chrome_procs)
50 |         cpu_times_diff = {p.pid: {'user': (t[0] - l[0]) / self.interval, 'sys': (t[1] - l[1]) / self.interval}
51 |                 for (p, t, l) in zip(chrome_procs, ordered_new_times, ordered_old_times)}
52 |         self.last_cpu_times = cpu_times
53 |         return cpu_times_diff, False
54 | 
55 | if __name__ == '__main__':
56 |     DiagnosticsLogger().run()
57 | 
58 | 


--------------------------------------------------------------------------------
/universe/wrappers/__init__.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import universe.wrappers.experimental
 3 | from universe import envs, spaces
 4 | from universe.wrappers import gym_core_sync
 5 | from universe.wrappers.blocking_reset import BlockingReset
 6 | from universe.wrappers.diagnostics import Diagnostics
 7 | from universe.wrappers.gym_core import GymCoreAction, GymCoreObservation, CropAtari
 8 | from universe.wrappers.joint import Joint
 9 | from universe.wrappers.logger import Logger
10 | from universe.wrappers.monitoring import Monitor
11 | from universe.wrappers.multiprocessing_env import WrappedMultiprocessingEnv, EpisodeID
12 | from universe.wrappers.recording import Recording
13 | from universe.wrappers.render import Render
14 | from universe.wrappers.throttle import Throttle
15 | from universe.wrappers.time_limit import TimeLimit
16 | from universe.wrappers.timer import Timer
17 | from universe.wrappers.vectorize import Vectorize, Unvectorize, WeakUnvectorize
18 | from universe.wrappers.vision import Vision
19 | 
20 | 
21 | def wrap(env):
22 |     return Timer(Render(Throttle(env)))
23 | 
24 | def WrappedVNCEnv():
25 |     return wrap(envs.VNCEnv())
26 | 
27 | def WrappedGymCoreEnv(gym_core_id, fps=None, rewarder_observation=False):
28 |     # Don't need to store the ID on the instance; it'll be retrieved
29 |     # directly from the spec
30 |     env = wrap(envs.VNCEnv(fps=fps))
31 |     if rewarder_observation:
32 |         env = GymCoreObservation(env, gym_core_id=gym_core_id)
33 |     return env
34 | 
35 | def WrappedGymCoreSyncEnv(gym_core_id, fps=60, rewarder_observation=False):
36 |     spec = gym.spec(gym_core_id)
37 |     env = gym_core_sync.GymCoreSync(BlockingReset(wrap(envs.VNCEnv(fps=fps))))
38 |     if rewarder_observation:
39 |         env = GymCoreObservation(env, gym_core_id=gym_core_id)
40 |     elif spec._entry_point.startswith('gym.envs.atari:'):
41 |         env = CropAtari(env)
42 | 
43 |     return env
44 | 
45 | def WrappedFlashgamesEnv():
46 |     keysym = spaces.KeyEvent.by_name('`').key
47 |     return wrap(envs.VNCEnv(probe_key=keysym))
48 | 
49 | def WrappedInternetEnv(*args, **kwargs):
50 |     return wrap(envs.InternetEnv(*args, **kwargs))
51 | 
52 | def WrappedStarCraftEnv(*args, **kwargs):
53 |     return wrap(envs.StarCraftEnv(*args, **kwargs))
54 | 
55 | def WrappedGTAVEnv(*args, **kwargs):
56 |     return wrap(envs.GTAVEnv(*args, **kwargs))
57 | 
58 | def WrappedWorldOfGooEnv(*args, **kwargs):
59 |     return wrap(envs.WorldOfGooEnv(*args, **kwargs))
60 | 


--------------------------------------------------------------------------------
/universe/wrappers/blocking_reset.py:
--------------------------------------------------------------------------------
 1 | from universe import rewarder, spaces, vectorized
 2 | 
 3 | class BlockingReset(vectorized.Wrapper):
 4 |     """
 5 | By default, a reset in universe is not a blocking operation.  This 
 6 | wrapper changes it. 
 7 | """
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         super(BlockingReset, self).__init__(*args, **kwargs)
11 |         self.reward_n = None
12 |         self.done_n = None
13 |         self.info = None
14 | 
15 |     def _reset(self):
16 |         observation_n = self.env.reset()
17 |         self.reward_n = [0] * self.n
18 |         self.done_n = [False] * self.n
19 |         self.info = {'n': [{} for _ in range(self.n)]}
20 | 
21 |         while any(ob is None for ob in observation_n):
22 |             action_n = []
23 |             for done in self.done_n:
24 |                 if done:
25 |                     # No popping of reward/done. Don't want to merge across episode boundaries.
26 |                     action_n.append([spaces.PeekReward])
27 |                 else:
28 |                     action_n.append([])
29 |             new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(action_n)
30 |             rewarder.merge_n(
31 |                 observation_n, self.reward_n, self.done_n, self.info,
32 |                 new_observation_n, new_reward_n, new_done_n, new_info
33 |             )
34 |         return observation_n
35 | 
36 |     def _step(self, action_n):
37 |         observation_n, reward_n, done_n, info = self.env.step(action_n)
38 |         if self.reward_n is not None:
39 |             rewarder.merge_n(
40 |                 observation_n, reward_n, done_n, info,
41 |                 [None] * self.n, self.reward_n, self.done_n, self.info
42 |             )
43 |             self.reward_n = self.done_n = self.info = None
44 | 
45 |         while any(ob is None for ob in observation_n):
46 |             action_n = []
47 |             for done in done_n:
48 |                 if done:
49 |                     # No popping of reward/done. Don't want to merge across episode boundaries.
50 |                     action_n.append([spaces.PeekReward])
51 |                 else:
52 |                     action_n.append([])
53 |             new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(action_n)
54 |             rewarder.merge_n(
55 |                 observation_n, reward_n, done_n, info,
56 |                 new_observation_n, new_reward_n, new_done_n, new_info
57 |             )
58 |         return observation_n, reward_n, done_n, info
59 | 


--------------------------------------------------------------------------------
/universe/vncdriver/dual_proxy_server.py:
--------------------------------------------------------------------------------
 1 | # a proxy server that handles both reward channel and vnc.
 2 | from twisted.python import log
 3 | from autobahn.twisted import websocket
 4 | import logging
 5 | import os
 6 | import time
 7 | import pexpect
 8 | import sys
 9 | import threading
10 | 
11 | from universe.vncdriver.vnc_proxy_server import VNCProxyServer
12 | from universe.rewarder.reward_proxy_server import RewardProxyServer
13 | from universe import utils
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class DualProxyServer(VNCProxyServer):
19 |     def __init__(self, action_queue=None, error_buffer=None, enable_logging=True):
20 |         self._log_info('DualProxyServer inited')
21 |         self.reward_proxy = None
22 | 
23 |         super(DualProxyServer, self).__init__(action_queue, error_buffer, enable_logging)
24 | 
25 |     def _log_info(self, msg, *args, **kwargs):
26 |         logger.info('[dual_proxy] ' + msg, *args, **kwargs)
27 | 
28 |     def recv_ClientInit(self, block):
29 |         # start reward proxy.
30 |         self._log_info('Starting reward proxy server')
31 |         self.reward_proxy = pexpect.spawnu(self.factory.reward_proxy_bin,
32 |                                            logfile=sys.stdout,
33 |                                            timeout=None)
34 | 
35 |         # wait on reward proxy to be up.
36 |         self._log_info('Waiting for reward proxy server')
37 |         self.reward_proxy.expect('\[RewardProxyServer\]')
38 |         self.reward_proxy_thread = threading.Thread(target=lambda: self.reward_proxy.expect(pexpect.EOF))
39 |         self.reward_proxy_thread.start()
40 | 
41 |         self._log_info('Reward proxy server is up %s', self.reward_proxy.before)
42 | 
43 |         super(DualProxyServer, self).recv_ClientInit(block)
44 | 
45 |         self.logfile_dir = self.log_manager.logfile_dir
46 | 
47 |     def close(self):
48 |         # end connections.
49 |         super(DualProxyServer, self).close()
50 | 
51 |         # wait for rewarder to close.
52 |         if self.reward_proxy:
53 |             self.reward_proxy.terminate()
54 | 
55 |         # upload to s3.
56 |         # probably hacky right now.
57 |         logger.info('log manager = %s', self.log_manager)
58 |         if self.log_manager:
59 |             os.system('/app/universe/bin/upload_directory.sh demonstrator_%(recorder_id)s %(directory)s %(bucket)s' %
60 |                     dict(recorder_id=self.factory.recorder_id, directory=self.logfile_dir,
61 |                         bucket=self.factory.bucket)
62 |                     )
63 | 
64 | 


--------------------------------------------------------------------------------
/universe/wrappers/joint.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing import pool
 2 | from universe import error, rewarder, vectorized
 3 | 
 4 | class Joint(vectorized.Wrapper):
 5 |     def __init__(self, env_m):
 6 |         self.env_m = env_m
 7 | 
 8 |         # TODO: generalize this. Doing so requires adding a vectorized
 9 |         # space mode.
10 |         self.action_space = env_m[0].action_space
11 |         self.observation_space = env_m[0].observation_space
12 | 
13 |         self.pool = pool.ThreadPool(min(len(env_m), 5))
14 | 
15 |         self._n = sum(env.n for env in self.env_m)
16 |         self.metadata = self.metadata.copy()
17 |         self.metadata['render.modes'] = self.env_m[0].metadata['render.modes']
18 | 
19 |     @property
20 |     def n(self):
21 |         return self._n
22 | 
23 |     def _close(self):
24 |         if hasattr(self, 'pool'):
25 |             self.pool.close()
26 | 
27 |     def _render(self, mode='human', close=False):
28 |         return self.env_m[0]._render(mode=mode, close=close)
29 | 
30 |     def _reset(self):
31 |         # Keep all env[0] action on the main thread, in case we ever
32 |         # need to render. Otherwise we get segfaults from the
33 |         # go-vncdriver.
34 |         reset_m_async = self.pool.map_async(lambda env: env.reset(), self.env_m[1:])
35 |         reset = self.env_m[0].reset()
36 |         reset_m = [reset] + reset_m_async.get()
37 | 
38 |         observation_n = []
39 |         for observation_m in reset_m:
40 |             observation_n += observation_m
41 |         return observation_n
42 | 
43 |     def _step(self, action_n):
44 |         observation_n = []
45 |         reward_n = []
46 |         done_n = []
47 |         info_n = []
48 |         info = {}
49 | 
50 |         action_m = []
51 |         for env in self.env_m:
52 |             action_m.append(action_n[len(action_m):len(action_m)+env.n])
53 | 
54 |         # Keep all env[0] action on the main thread, in case we ever
55 |         # need to render. Otherwise we get segfaults from the
56 |         # go-vncdriver.
57 |         step_m_async = self.pool.map_async(lambda arg: arg[0].step(arg[1]), zip(self.env_m[1:], action_m[1:]))
58 |         step = self.env_m[0].step(action_m[0])
59 |         step_m = [step] + step_m_async.get()
60 | 
61 |         for observation_m, reward_m, done_m, _info in step_m:
62 |             observation_n += observation_m
63 |             reward_n += reward_m
64 |             done_n += done_m
65 | 
66 |             # copy in any info keys
67 |             rewarder.merge_infos(info, _info)
68 |             info_n += _info['n']
69 | 
70 |         info['n'] = info_n
71 |         return observation_n, reward_n, done_n, info
72 | 


--------------------------------------------------------------------------------
/universe/envs/vnc_starcraft.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | 
 3 | from universe import spaces
 4 | from universe.spaces import vnc_event, VNCActionSpace
 5 | from universe.spaces.vnc_event import KeyEvent, PointerEvent
 6 | from universe.envs import vnc_env
 7 | from universe.vncdriver import constants
 8 | import logging
 9 | 
10 | logger = logging.getLogger()
11 | 
12 | SCREEN_DIM = (640, 480)
13 | 
14 | class StarCraftEnv(vnc_env.VNCEnv):
15 |     def __init__(self):
16 |         super(StarCraftEnv, self).__init__()
17 |         self.action_space = VNCActionSpace(
18 |             keys=['f2',  # Map positions
19 |                   'f3',  # Map positions
20 |                   'f4',  # Map positions
21 |                   'spacebar',
22 |                   'left',
23 |                   'up',
24 |                   'right',
25 |                   'down'],
26 |             screen_shape=SCREEN_DIM
27 |         )
28 |         self.safe_action_space = self.action_space
29 | 
30 |     # def _step(self, action_n):
31 |     #     return super(StarCraftEnv, self)._step(
32 |     #         (StarCraftEventFilter.filter(a) for a in action_n))
33 | 
34 | 
35 | # class StarCraftEventFilter(object):
36 | #     """
37 | #     We only allow keyboard inputs used by StarCraft:
38 | #     http://gamingweapons.com/image/steelseries/zboard-starcraft2-keyset/steelseries_zboard_starcraft2_keyset_02.jpg
39 | #     """
40 | #     _x_offset = 5  # Centered
41 | #     _y_offset = 30  # Remove the chrome
42 | 
43 | #     @classmethod
44 | #     def _safe_pointer_event(cls, event):
45 | #         """Returns true if the click is in a place that will not break out of the box"""
46 | #         height = SCREEN_DIM[0]
47 | #         width = SCREEN_DIM[1]
48 | #         margin = 5  # Never allow clicking within 5 pixels of the edge of the screen
49 | 
50 | #         unsafe_locations = [
51 | #             (event.y < cls._y_offset + margin),  # At the top, where menu chrome is
52 | #             (event.y > height + cls._y_offset - margin),  # Too far down
53 | #             (event.x < cls._x_offset + margin),  # Too far left
54 | #             (event.x > width + cls._x_offset - margin),  # Too far right
55 | #             (410 < event.x < 510) and (370 < event.y < 450),  # Where the menu button is
56 | #         ]
57 | #         unsafe = any(unsafe_locations)
58 | #         if unsafe:
59 | #             logger.warning('skipping unsafe pointer event')
60 | #         return not unsafe
61 | 
62 | #     @classmethod
63 | #     def safe_event(cls, event):
64 | #         if isinstance(event, PointerEvent):
65 | #             return cls._safe_pointer_event(event)
66 | 
67 | #     @classmethod
68 | #     def filter(cls, events):
69 | #         return filter(cls.safe_event, events)
70 | 


--------------------------------------------------------------------------------
/universe/wrappers/tests/test_time_limit.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import time
 3 | import universe
 4 | from gym.envs import register
 5 | from universe import wrappers
 6 | 
 7 | register(
 8 |     id='test.SecondsLimitDummyVNCEnv-v0',
 9 |     entry_point='universe.envs:DummyVNCEnv',
10 |     max_episode_seconds=0.1,
11 |     tags={
12 |         'vnc': True,
13 |         }
14 |     )
15 | 
16 | register(
17 |     id='test.StepsLimitDummyVNCEnv-v0',
18 |     entry_point='universe.envs:DummyVNCEnv',
19 |     max_episode_steps=2,
20 |     tags={
21 |         'vnc': True,
22 |         }
23 |     )
24 | 
25 | 
26 | def test_steps_limit_restart():
27 |     env = gym.make('test.StepsLimitDummyVNCEnv-v0')
28 |     env.configure(_n=1)
29 |     env = wrappers.TimeLimit(env)
30 |     env.reset()
31 | 
32 |     assert env._max_episode_seconds == None
33 |     assert env._max_episode_steps == 2
34 | 
35 |     # Episode has started
36 |     _, _, done, info = env.step([[]])
37 |     assert done == [False]
38 | 
39 |     # Limit reached, now we get a done signal and the env resets itself
40 |     _, _, done, info = env.step([[]])
41 |     assert done == [True]
42 |     assert env._elapsed_steps == 0
43 | 
44 | 
45 | def test_steps_limit_restart_unused_when_not_wrapped():
46 |     env = gym.make('test.StepsLimitDummyVNCEnv-v0')
47 |     env.configure(_n=1)
48 |     env.reset()
49 | 
50 |     for i in range(10):
51 |         _, _, done, info = env.step([[]])
52 |         assert done == [False]
53 | 
54 | 
55 | def test_seconds_limit_restart():
56 |     env = gym.make('test.SecondsLimitDummyVNCEnv-v0')
57 |     env.configure(_n=1)
58 |     env = wrappers.TimeLimit(env)
59 |     env.reset()
60 | 
61 |     assert env._max_episode_seconds == 0.1
62 |     assert env._max_episode_steps == None
63 | 
64 |     # Episode has started
65 |     _, _, done, info = env.step([[]])
66 |     assert done == [False]
67 | 
68 |     # Not enough time has passed
69 |     _, _, done, info = env.step([[]])
70 |     assert done == [False]
71 | 
72 |     time.sleep(0.2)
73 | 
74 |     # Limit reached, now we get a done signal and the env resets itself
75 |     _, _, done, info = env.step([[]])
76 |     assert done == [True]
77 | 
78 | 
79 | def test_default_time_limit():
80 |     # We need an env without a default limit
81 |     register(
82 |         id='test.NoLimitDummyVNCEnv-v0',
83 |         entry_point='universe.envs:DummyVNCEnv',
84 |         tags={
85 |             'vnc': True,
86 |             },
87 |     )
88 | 
89 |     env = gym.make('test.NoLimitDummyVNCEnv-v0')
90 |     env.configure(_n=1)
91 |     env = wrappers.TimeLimit(env)
92 |     env.reset()
93 | 
94 |     assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS
95 |     assert env._max_episode_steps == None
96 | 


--------------------------------------------------------------------------------
/universe/spaces/joystick_action_space.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | from gym.spaces import Box
 3 | from universe.spaces import joystick_event
 4 | from gym.spaces import prng
 5 | from collections import OrderedDict
 6 | 
 7 | 
 8 | class JoystickActionSpace(gym.Space):
 9 |     """
10 |     Programmable joystick - currently Windows-only => mapped to vJoy
11 |     """
12 |     def __init__(self, axis_x=False, axis_y=False, axis_z=False, axis_rx=False, axis_ry=False, axis_rz=False,
13 |                  slider_0=False, slider_1=False):
14 |         self.event_space_map = OrderedDict()
15 | 
16 |         if axis_x:
17 |             self.axis_x = box_axis()
18 |             self.event_space_map[joystick_event.JoystickAxisXEvent] = self.axis_x
19 |         if axis_y:
20 |             self.axis_y = box_axis()
21 |             self.event_space_map[joystick_event.JoystickAxisYEvent] = self.axis_y
22 |         if axis_z:
23 |             self.axis_z = box_axis()
24 |             self.event_space_map[joystick_event.JoystickAxisZEvent] = self.axis_z
25 |         if axis_rx:
26 |             self.axis_rx = box_axis()
27 |             self.event_space_map[joystick_event.JoystickAxisRxEvent] = self.axis_rx
28 |         if axis_ry:
29 |             self.axis_ry = box_axis()
30 |             self.event_space_map[joystick_event.JoystickAxisRyEvent] = self.axis_ry
31 |         if axis_rz:
32 |             self.axis_rz = box_axis()
33 |             self.event_space_map[joystick_event.JoystickAxisRzEvent] = self.axis_rz
34 |         if slider_0:
35 |             self.slider_0 = box_axis()
36 |             self.event_space_map[joystick_event.JoystickSlider0Event] = self.slider_0
37 |         if slider_1:
38 |             self.slider_1 = box_axis()
39 |             self.event_space_map[joystick_event.JoystickSlider1Event] = self.slider_1
40 |         # TODO: Add buttons (similar to a vnc_event.KeyEvent - but 1..32)
41 |         # TODO: Add POV hats
42 | 
43 |     def contains(self, action):
44 |         if not isinstance(action, list):
45 |             return False
46 |         for a in action:
47 |             if isinstance(a, joystick_event.JoystickAxisEvent):
48 |                 axis = self.event_space_map[a]
49 |                 if not axis.contains(a):
50 |                     return False
51 |         return True
52 | 
53 |     def sample(self):
54 |         event_type_index = prng.np_random.randint(len(self.event_space_map))
55 |         event_type = list(self.event_space_map.keys())[event_type_index]
56 |         if event_type.__bases__[0] == joystick_event.JoystickAxisEvent:
57 |             event = [event_type(self.event_space_map[event_type].sample()[0])]
58 |         else:
59 |             raise JoystickActionSpaceException('Unexpected event type')
60 |         return event
61 | 
62 | 
63 | class JoystickActionSpaceException(Exception):
64 |     pass
65 | 
66 | 
67 | def box_axis():
68 |     return Box(-1.0, 1.0, shape=(1,))
69 | 


--------------------------------------------------------------------------------
/universe/rewarder/connection_timer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import signal
 4 | import time
 5 | 
 6 | from universe import error
 7 | from universe.twisty import reactor
 8 | from twisted.internet import defer, protocol
 9 | import twisted.internet.error
10 | import logging
11 | 
12 | logger = logging.getLogger(__name__)
13 | extra_logger = logging.getLogger('universe.extra.'+__name__)
14 | 
15 | class ConnectionTimer(protocol.Protocol):
16 |     def connectionMade(self):
17 |         self.transport.loseConnection()
18 | 
19 | def start(endpoint):
20 |     start = time.time()
21 |     return endpoint.connect(
22 |         protocol.ClientFactory.forProtocol(ConnectionTimer)
23 |     ).addCallback(lambda _: time.time() - start)
24 | 
25 | def measure_clock_skew(label, host):
26 |     cmd = ['ntpdate', '-q', '-p', '8', host]
27 |     extra_logger.info('[%s] Starting network calibration with %s', label, ' '.join(cmd))
28 |     skew = Clockskew(label, cmd)
29 |     # TODO: search PATH for this?
30 |     process = reactor.spawnProcess(skew, '/usr/sbin/ntpdate', cmd, {})
31 |     # process = reactor.spawnProcess(skew, '/bin/sleep', ['sleep', '2'], {})
32 | 
33 |     t = float(os.environ.get('UNIVERSE_NTPDATE_TIMEOUT', 20))
34 |     def timeout():
35 |         if process.pid:
36 |             logger.error('[%s] %s call timed out after %ss; killing the subprocess. This is ok, but you could have more accurate timings by enabling UDP port 123 traffic to your env. (Alternatively, you can try increasing the timeout by setting environment variable UNIVERSE_NTPDATE_TIMEOUT=10.)', label, ' '.join(cmd), t)
37 |             process.signalProcess(signal.SIGKILL)
38 |             process.reapProcess()
39 |     # TODO: make this part of the connection string
40 |     reactor.callLater(t, timeout)
41 |     return skew.deferred
42 | 
43 | class Clockskew(protocol.ProcessProtocol):
44 |     def __init__(self, label, cmd):
45 |         self.label = label
46 |         self._cmd = cmd
47 | 
48 |         self.deferred = defer.Deferred()
49 |         self.out = []
50 |         self.err = []
51 | 
52 |     def outReceived(self, data):
53 |         self.out.append(data)
54 | 
55 |     def errReceived(self, data):
56 |         self.err.append(data)
57 | 
58 |     def processExited(self, reason):
59 |         if isinstance(reason.value, twisted.internet.error.ProcessDone):
60 |             out = b''.join(self.out).decode('utf-8')
61 |             match = re.search('offset ([\d.-]+) sec', out)
62 |             if match is not None:
63 |                 offset = float(match.group(1))
64 |                 self.deferred.callback(offset)
65 |             else:
66 |                 self.deferred.errback(error.Error('Could not parse offset: %s', out))
67 |         else:
68 |             err = b''.join(self.err)
69 |             self.deferred.errback(error.Error('{} failed with status {}: stderr={!r}'.format(self._cmd, reason.value.exitCode, err)))
70 | 
71 | class ConnectionTimerException(Exception):
72 |     pass
73 | 


--------------------------------------------------------------------------------
/universe/envs/dummy_vnc_env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import numpy as np
 3 | 
 4 | from gym.utils import reraise
 5 | 
 6 | from universe import error, rewarder, spaces, utils, vectorized
 7 | from universe.envs import diagnostics
 8 | from universe.remotes import healthcheck
 9 | from universe.runtimes import registration
10 | 
11 | class DummyVNCEnv(vectorized.Env):
12 |     """
13 |     A simple env for unit testing that does nothing, but looks like a VNC env.
14 |     It accepts any actions, and returns black screens.
15 |     It also returns the actions in the observation, so you can test that action wrappers are producing the right answers
16 |     For example, to test that YourActionWrapper converts example_input_action to example_output_action:
17 | 
18 |     >>> dummy_env = gym.make('test.DummyVNCEnv-v0')
19 |     >>> e = YourActionWrapper(dummy_env)
20 |     >>> e = universe.wrappers.Unvectorize(e)
21 |     >>> observation, reward, done, info = e.step(example_input_action)
22 |     >>> assert observation['action'] == example_output_action
23 | 
24 |     """
25 |     metadata = {
26 |         'render.modes': ['human'], # we wrap with a Render which can render to rgb_array
27 |         'semantics.async': True,
28 |         'semantics.autoreset': True,
29 |         'video.frames_per_second' : 60,
30 |         'runtime.vectorized': True,
31 |     }
32 | 
33 |     def __init__(self):
34 |         self._started = False
35 | 
36 |         self.observation_space = spaces.VNCObservationSpace()
37 |         self.action_space = spaces.VNCActionSpace()
38 | 
39 |     def configure(self, remotes=None,
40 |                    client_id=None,
41 |                    start_timeout=None, docker_image=None,
42 |                    ignore_clock_skew=False, disable_action_probes=False,
43 |                    vnc_driver=None, vnc_kwargs={},
44 |                    replace_on_crash=False, allocate_sync=True,
45 |                    observer=False,
46 |                    _n=3,
47 |     ):
48 |         self.n = _n
49 |         self._reward_buffers = [rewarder.RewardBuffer('dummy:{}'.format(i)) for i in range(self.n)]
50 |         self._started = True
51 | 
52 |     def _reset(self):
53 |         return [None] * self.n
54 | 
55 |     def _step(self, action_n):
56 |         assert self.n == len(action_n), "Expected {} actions but received {}: {}".format(self.n, len(action_n), action_n)
57 | 
58 |         observation_n = [{
59 |             'vision': np.zeros((1024, 768, 3), dtype=np.uint8),
60 |             'text': [],
61 |             'action': action_n[i]
62 |         } for i in range(self.n)]
63 | 
64 |         reward_n = []
65 |         done_n = []
66 |         info_n = []
67 |         for reward_buffer in self._reward_buffers:
68 |             reward, done, info = reward_buffer.pop()
69 |             reward_n.append(reward)
70 |             done_n.append(done)
71 |             info_n.append(info)
72 |         return observation_n, reward_n, done_n, {'n': info_n}
73 | 
74 |     def __str__(self):
75 |         return 'DummyVNCEnv'
76 | 


--------------------------------------------------------------------------------
/universe/spaces/vnc_event.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | from universe import error
 3 | from universe.vncdriver import constants
 4 | 
 5 | class VNCEvent(object):
 6 |     pass
 7 | 
 8 | def keycode(key):
 9 |     if key in constants.KEYMAP:
10 |         return constants.KEYMAP.get(key)
11 |     elif len(key) == 1:
12 |         return ord(key)
13 |     else:
14 |         raise error.Error('Not sure how to translate to keycode: {!r}'.format(key))
15 | 
16 | class KeyEvent(VNCEvent):
17 |     _keysym_to_name = {}
18 |     for key, value in constants.KEYMAP.items():
19 |         _keysym_to_name[value] = key
20 |     for c in string.printable:
21 |         _keysym_to_name[ord(c)] = c
22 | 
23 |     @classmethod
24 |     def build(cls, keys, down=None):
25 |         """Build a key combination, such as:
26 | 
27 |         ctrl-t
28 |         """
29 |         codes = []
30 |         for key in keys.split('-'):
31 |             key = keycode(key)
32 |             codes.append(key)
33 | 
34 |         events = []
35 |         if down is None or down:
36 |             for code in codes:
37 |                 events.append(cls(code, down=True))
38 | 
39 |         if down is None or not down:
40 |             for code in reversed(codes):
41 |                 events.append(cls(code, down=False))
42 |         return events
43 | 
44 |     @classmethod
45 |     def by_name(cls, key, down=None):
46 |         return cls(keycode(key), down=down)
47 | 
48 |     def __init__(self, key, down=True):
49 |         # TODO: validate key
50 |         self.key = key
51 |         self.down = bool(down)
52 | 
53 |     def compile(self):
54 |         return 'KeyEvent', self.key, self.down
55 | 
56 |     def __repr__(self):
57 |         if self.down:
58 |             direction = 'down'
59 |         else:
60 |             direction = 'up'
61 |         name = self._keysym_to_name.get(self.key)
62 |         if not name:
63 |             name = '0x{:x}'.format(self.key)
64 |         else:
65 |             name = '{} (0x{:x})'.format(name, self.key)
66 |         return 'KeyEvent<key={} direction={}>'.format(name, direction)
67 | 
68 |     def __str__(self):
69 |         return repr(self)
70 | 
71 |     def __hash__(self):
72 |         return (self.key, self.down).__hash__()
73 | 
74 |     def __eq__(self, other):
75 |         return type(other) == type(self) and \
76 |             other.key == self.key and \
77 |             other.down == self.down
78 | 
79 |     @property
80 |     def key_name(self):
81 |         """Human readable name"""
82 |         return self._keysym_to_name.get(self.key)
83 | 
84 | class PointerEvent(VNCEvent):
85 |     def __init__(self, x, y, buttonmask=0):
86 |         self.x = x
87 |         self.y = y
88 |         self.buttonmask = buttonmask
89 | 
90 |     def compile(self):
91 |         return 'PointerEvent', self.x, self.y, self.buttonmask
92 | 
93 |     def __repr__(self):
94 |         return 'PointerEvent<x={} y={} buttonmask={}>'.format(self.x, self.y, self.buttonmask)
95 | 
96 |     def __str__(self):
97 |         return repr(self)
98 | 


--------------------------------------------------------------------------------
/universe/wrappers/vectorize.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import weakref
 3 | 
 4 | from universe import error
 5 | from universe.vectorized import core
 6 | 
 7 | class Vectorize(gym.Wrapper):
 8 |     """
 9 | Given an unvectorized environment (where, e.g., the output of .step() is an observation
10 | rather than a list of observations), turn it into a vectorized environment with a batch of size
11 | 1.
12 | """
13 | 
14 |     metadata = {'runtime.vectorized': True}
15 | 
16 |     def __init__(self, env):
17 |         super(Vectorize, self).__init__(env)
18 |         assert not env.metadata.get('runtime.vectorized')
19 |         assert self.metadata.get('runtime.vectorized')
20 |         self.n = 1
21 | 
22 |     def _reset(self):
23 |         observation = self.env.reset()
24 |         return [observation]
25 | 
26 |     def _step(self, action):
27 |         observation, reward, done, info = self.env.step(action[0])
28 |         return [observation], [reward], [done], {'n': [info]}
29 | 
30 |     def _seed(self, seed):
31 |         return [self.env.seed(seed[0])]
32 | 
33 | class Unvectorize(core.Wrapper):
34 |     """
35 | Take a vectorized environment with a batch of size 1 and turn it into an unvectorized environment.
36 | """
37 |     autovectorize = False
38 |     metadata = {'runtime.vectorized': False}
39 | 
40 |     def _reset(self):
41 |         observation_n = self.env.reset()
42 |         assert(len(observation_n) == 1)
43 |         return observation_n[0]
44 | 
45 |     def _step(self, action):
46 |         action_n = [action]
47 |         observation_n, reward_n, done_n, info = self.env.step(action_n)
48 |         return observation_n[0], reward_n[0], done_n[0], info['n'][0]
49 | 
50 |     def _seed(self, seed):
51 |         return self.env.seed([seed])[0]
52 | 
53 | class WeakUnvectorize(Unvectorize):
54 |     def __init__(self, env, i):
55 |         self._env_ref = weakref.ref(env)
56 |         super(WeakUnvectorize, self).__init__(env)
57 |         # WeakUnvectorize won't get configure called on it
58 |         self.i = i
59 | 
60 |     def _check_for_duplicate_wrappers(self):
61 |         pass  # Disable this check because we need to wrap vectorized envs in multiple unvectorize wrappers
62 | 
63 |     @property
64 |     def env(self):
65 |         # Called upon instantiation
66 |         if not hasattr(self, '_env_ref'):
67 |             return
68 | 
69 |         env = self._env_ref()
70 |         if env is None:
71 |             raise error.Error("env has been garbage collected. To keep using WeakUnvectorize, you must keep around a reference to the env object. (HINT: try assigning the env to a variable in your code.)")
72 |         return env
73 | 
74 |     @env.setter
75 |     def env(self, value):
76 |         # We'll maintain our own weakref, thank you very much.
77 |         pass
78 | 
79 |     def _seed(self, seed):
80 |         # We handle the seeding ourselves in the vectorized Monitor
81 |         return [seed]
82 | 
83 |     def close(self):
84 |         # Don't want to close through this wrapper
85 |         pass
86 | 


--------------------------------------------------------------------------------
/universe/scoreboard/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.benchmarks import scoring
 2 | from gym.benchmarks import register_benchmark
 3 | 
 4 | register_benchmark(
 5 |     id='Atari7VNC-v0',
 6 |     scorer=scoring.TotalReward(),
 7 |     name='AtariVNC',
 8 |     description='7 Atari games, with pixel observations (using universe)',
 9 |     tasks=[
10 |         {
11 |             "env_id": "VNCBeamRider-v3",
12 |             "trials": 1,
13 |             "max_timesteps": 10000000
14 |         },
15 |         {
16 |             "env_id": "VNCBreakout-v3",
17 |             "trials": 1,
18 |             "max_timesteps": 10000000
19 |         },
20 |         {
21 |             "env_id": "VNCEnduro-v3",
22 |             "trials": 1,
23 |             "max_timesteps": 10000000
24 |         },
25 |         {
26 |             "env_id": "gym-core.Pong-v3",
27 |             "trials": 1,
28 |             "max_timesteps": 10000000
29 |         },
30 |         {
31 |             "env_id": "VNCQbert-v3",
32 |             "trials": 1,
33 |             "max_timesteps": 10000000
34 |         },
35 |         {
36 |             "env_id": "VNCSeaquest-v3",
37 |             "trials": 1,
38 |             "max_timesteps": 10000000
39 |         },
40 |         {
41 |             "env_id": "VNCSpaceInvaders-v3",
42 |             "trials": 1,
43 |             "max_timesteps": 10000000
44 |         }
45 |     ])
46 | 
47 | register_benchmark(
48 |     id='FlashRacing-v0',
49 |     scorer=scoring.RewardPerTime(),
50 |     name='FlashRacing',
51 |     description='7 flash racing games, goal is best score per time',
52 |     tasks=[
53 |         {'env_id': 'flashgames.NeonRace-v0',
54 |          'trials': 1,
55 |          'max_timesteps': 5000000,
56 |          'reward_floor':   175.0,
57 |          'reward_ceiling': 1700.0,
58 |         },
59 |         {'env_id': 'flashgames.CoasterRacer-v0',
60 |          'trials': 1,
61 |          'max_timesteps': 5000000,
62 |          'reward_floor':   17.0,
63 |          'reward_ceiling': 400.0,
64 |         },
65 |         {'env_id': 'flashgames.HeatRushUsa-v0',
66 |          'trials': 1,
67 |          'max_timesteps': 5000000,
68 |          'reward_floor':   150.0,
69 |          'reward_ceiling': 700.0,
70 |         },
71 |         {'env_id': 'flashgames.FormulaRacer-v0',
72 |          'trials': 1,
73 |          'max_timesteps': 5000000,
74 |          'reward_floor':  0.27,
75 |          'reward_ceiling': 1.0,
76 |         },
77 |         {'env_id': 'flashgames.DuskDrive-v0',
78 |          'trials': 1,
79 |          'max_timesteps': 5000000,
80 |          'reward_floor':   5000.0,
81 |          'reward_ceiling': 15000.0,
82 |         },
83 |         {'env_id': 'flashgames.SpacePunkRacer-v0',
84 |          'trials': 1,
85 |          'max_timesteps': 5000000,
86 |          'reward_floor':   0.67,
87 |          'reward_ceiling': 2.25,
88 |         },
89 |         {'env_id': 'flashgames.NeonRace2-v0',
90 |          'trials': 1,
91 |          'max_timesteps': 5000000,
92 |          'reward_floor':   0.0,
93 |          'reward_ceiling': 1200.0,
94 |         }
95 |     ])
96 | 


--------------------------------------------------------------------------------
/universe/remotes/compose/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import unicode_literals
 3 | 
 4 | import codecs
 5 | import hashlib
 6 | import json
 7 | import json.decoder
 8 | 
 9 | import six
10 | 
11 | 
12 | json_decoder = json.JSONDecoder()
13 | 
14 | 
15 | def get_output_stream(stream):
16 |     if six.PY3:
17 |         return stream
18 |     return codecs.getwriter('utf-8')(stream)
19 | 
20 | 
21 | def stream_as_text(stream):
22 |     """Given a stream of bytes or text, if any of the items in the stream
23 |     are bytes convert them to text.
24 | 
25 |     This function can be removed once docker-py returns text streams instead
26 |     of byte streams.
27 |     """
28 |     for data in stream:
29 |         if not isinstance(data, six.text_type):
30 |             data = data.decode('utf-8', 'replace')
31 |         yield data
32 | 
33 | 
34 | def line_splitter(buffer, separator=u'\n'):
35 |     index = buffer.find(six.text_type(separator))
36 |     if index == -1:
37 |         return None
38 |     return buffer[:index + 1], buffer[index + 1:]
39 | 
40 | 
41 | def split_buffer(stream, splitter=None, decoder=lambda a: a):
42 |     """Given a generator which yields strings and a splitter function,
43 |     joins all input, splits on the separator and yields each chunk.
44 | 
45 |     Unlike string.split(), each chunk includes the trailing
46 |     separator, except for the last one if none was found on the end
47 |     of the input.
48 |     """
49 |     splitter = splitter or line_splitter
50 |     buffered = six.text_type('')
51 | 
52 |     for data in stream_as_text(stream):
53 |         buffered += data
54 |         while True:
55 |             buffer_split = splitter(buffered)
56 |             if buffer_split is None:
57 |                 break
58 | 
59 |             item, buffered = buffer_split
60 |             yield item
61 | 
62 |     if buffered:
63 |         yield decoder(buffered)
64 | 
65 | 
66 | def json_splitter(buffer):
67 |     """Attempt to parse a json object from a buffer. If there is at least one
68 |     object, return it and the rest of the buffer, otherwise return None.
69 |     """
70 |     try:
71 |         obj, index = json_decoder.raw_decode(buffer)
72 |         rest = buffer[json.decoder.WHITESPACE.match(buffer, index).end():]
73 |         return obj, rest
74 |     except ValueError:
75 |         return None
76 | 
77 | 
78 | def json_stream(stream):
79 |     """Given a stream of text, return a stream of json objects.
80 |     This handles streams which are inconsistently buffered (some entries may
81 |     be newline delimited, and others are not).
82 |     """
83 |     return split_buffer(stream, json_splitter, json_decoder.decode)
84 | 
85 | 
86 | def json_hash(obj):
87 |     dump = json.dumps(obj, sort_keys=True, separators=(',', ':'))
88 |     h = hashlib.sha256()
89 |     h.update(dump.encode('utf8'))
90 |     return h.hexdigest()
91 | 
92 | 
93 | def microseconds_from_time_nano(time_nano):
94 |     return int(time_nano % 1000000000 / 1000)
95 | 
96 | 
97 | def build_string_dict(source_dict):
98 |     return dict((k, str(v if v is not None else '')) for k, v in source_dict.items())
99 | 


--------------------------------------------------------------------------------
/universe/rewarder/merge.py:
--------------------------------------------------------------------------------
 1 | from universe import error
 2 | import six
 3 | 
 4 | def merge_infos(info1, info2):
 5 |     """We often need to aggregate together multiple infos. Most keys can
 6 |     just be clobbered by the new info, but e.g. any keys which contain
 7 |     counts should be added. The merge schema is indicated by the key
 8 |     namespace.
 9 | 
10 |     Namespaces:
11 | 
12 |     - stats.timers: Timing
13 |     - stats.gauges: Gauge values
14 |     - stats.*: Counts of a quantity
15 |     """
16 |     for key, value in six.iteritems(info2):
17 |         if key in info1 and key.startswith('stats'):
18 |             if key.startswith('stats.timers'):
19 |                 # timer
20 |                 info1[key] += value
21 |             elif key.startswith('stats.gauges'):
22 |                 # gauge
23 |                 info1[key] = value
24 |             else:
25 |                 # counter
26 |                 info1[key] += value
27 |         else:
28 |             info1[key] = value
29 | 
30 | def merge_reward_n(accum_reward_n, reward_n):
31 |     for i in range(len(reward_n)):
32 |         if reward_n[i] is not None:
33 |             # Add rewards
34 |             accum_reward_n[i] += reward_n[i]
35 | 
36 | def merge_done_n(accum_done_n, done_n):
37 |     for i in range(len(done_n)):
38 |         # Copy over done if the episode is indeed none
39 |         if done_n[i]:
40 |             accum_done_n[i] = done_n[i]
41 | 
42 | def _merge_observation(accum_observation, observation):
43 |     """
44 |     Old visual observation is discarded, because it is outdated frame.
45 |     Text observations are merged, because they are messages sent from the rewarder.
46 |     """
47 |     if observation is None:
48 |         # We're currently masking. So accum_observation probably
49 |         # belongs to the previous episode. We may lose a "text"
50 |         # observation from the previous episode, but that's ok.
51 |         return None
52 |     elif accum_observation is None:
53 |         # Nothing to merge together
54 |         return observation
55 | 
56 |     accum_observation['vision'] = observation.get('vision')
57 |     accum_observation['text'] = accum_observation.get('text', []) + observation.get('text', [])
58 |     return accum_observation
59 | 
60 | def merge_observation_n(accum_observation_n, observation_n):
61 |     # Merge observations.
62 |     for i in range(len(accum_observation_n)):
63 |         accum_observation_n[i] = _merge_observation(accum_observation_n[i], observation_n[i])
64 | 
65 | def merge_n(
66 |         accum_observation_n, accum_reward_n, accum_done_n, accum_info,
67 |         observation_n, reward_n, done_n, info,
68 | ):
69 |     # Merge observation/reward/done
70 |     merge_observation_n(accum_observation_n, observation_n)
71 |     merge_reward_n(accum_reward_n, reward_n)
72 |     merge_done_n(accum_done_n, done_n)
73 | 
74 |     # Merge together infos. We deep merge the 'n' key and do a
75 |     # simple merge on everything else.
76 |     accum_info_n = accum_info['n']
77 |     for accum_info_i, info_i in zip(accum_info_n, info['n']):
78 |         merge_infos(accum_info_i, info_i)
79 | 
80 |     merge_infos(accum_info, info)
81 |     accum_info['n'] = accum_info_n
82 | 


--------------------------------------------------------------------------------
/universe/wrappers/multiprocessing_env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import numpy as np
 3 | from universe import vectorized
 4 | from universe.wrappers import render
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | def WrappedMultiprocessingEnv(env_id):
 9 |     return render.Render(EpisodeID(vectorized.MultiprocessingEnv(env_id)))
10 | 
11 | class RemoveNones(vectorized.Wrapper):
12 |     """The vectorized environment will return None for any indexes that
13 |     have already exceeded their episode count (not to be confused with
14 |     the Nones returned by resetting environments in the real-time
15 |     case). For convenience, we instead return a plausible observation
16 |     in each such slot.
17 |     """
18 |     def __init__(self, env):
19 |         super(RemoveNones, self).__init__(env)
20 |         self.plausible_observation = None
21 | 
22 |     def _reset(self):
23 |         observation_n = self.env.reset()
24 |         self.plausible_observation = observation_n[0]
25 |         return observation_n
26 | 
27 |     def _step(self, action_n):
28 |         observation_n, reward_n, done_n, info = self.env.step(action_n)
29 |         observation_n = [ob if ob is not None else self.plausible_observation for ob in observation_n]
30 |         return observation_n, reward_n, done_n, info
31 | 
32 | class EpisodeID(vectorized.Wrapper):
33 |     metadata = {
34 |         'configure.required': True
35 |     }
36 |     """
37 | For each episode, return its id, and also return the total number of contiguous
38 | episodes that are now done.
39 | """
40 |     def configure(self, episode_limit=None, **kwargs):
41 |         self.env.configure(**kwargs)
42 |         self.episode_limit = episode_limit
43 |         self._clear_state()
44 | 
45 |     def _clear_state(self):
46 |         self.done_to = -1
47 |         self.extra_done = set()
48 |         self.episode_ids = list(range(self.n))
49 | 
50 |     def _set_done_to(self):
51 |         while True:
52 |             next_done_to = self.done_to + 1
53 |             if next_done_to in self.extra_done:
54 |                 self.done_to = next_done_to
55 |                 self.extra_done.remove(next_done_to)
56 |             else:
57 |                 break
58 | 
59 |     def _reset(self):
60 |         self._clear_state()
61 |         return self.env.reset()
62 | 
63 |     def _step(self, action_n):
64 |         observation_n, reward_n, done_n, info = self.env.step(action_n)
65 |         # Pass along ID of potentially-done episode
66 |         for i, info_i in enumerate(info['n']):
67 |             info_i['vectorized.episode_id'] = self.episode_ids[i]
68 | 
69 |         done_i = np.argwhere(done_n).reshape(-1)
70 |         if len(done_i):
71 |             for i in done_i:
72 |                 self.extra_done.add(self.episode_ids[i])
73 |                 # Episode completed, so we bump its value
74 |                 self.episode_ids[i] += self.n
75 |                 if self.episode_limit is not None and self.episode_ids[i] > self.episode_limit:
76 |                     logger.debug('Masking: index=%s episode_id=%s', i, self.episode_ids[i])
77 |                     self.env.mask(i)
78 |             self._set_done_to()
79 | 
80 |         # Pass along the number of contiguous episodes that are now done
81 |         info['vectorized.done_to'] = self.done_to
82 |         return observation_n, reward_n, done_n, info
83 | 


--------------------------------------------------------------------------------
/tests/functional/test_envs.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import pytest
 4 | import re
 5 | 
 6 | import gym
 7 | from universe import wrappers
 8 | from universe.runtimes import registration
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | # Choose a sample from each category
13 | # TODO: Add more comprehensive test that runs all envs
14 | test_envs = [
15 |     # 'gym-core.PongShortSync-v3',
16 |     # 'gym-core.CartPoleLowDSync-v0',
17 |     'flashgames.DuskDrive-v0',
18 |     'internet.SlitherIO-v0',
19 |     # 'wob.DragBox-v0',
20 | ]
21 | 
22 | @pytest.mark.parametrize('env_id', test_envs)
23 | def test_smoke(env_id):
24 |     """Check that environments start up without errors and that we can extract rewards and observations"""
25 |     gym.undo_logger_setup()
26 |     logging.getLogger().setLevel(logging.INFO)
27 | 
28 |     env = gym.make(env_id)
29 |     if env.metadata.get('configure.required', False):
30 |         if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'):  # Used to test universe-envs in CI
31 |             configure_with_latest_docker_runtime_tag(env)
32 |         else:
33 |             env.configure(remotes=1)
34 | 
35 |     env = wrappers.Unvectorize(env)
36 | 
37 |     env.reset()
38 |     _rollout(env, timestep_limit=60*30) # Check a rollout
39 | 
40 | def _rollout(env, timestep_limit=None):
41 |     """
42 |     Test that a rollout follows our desired format. Includes the following checks:
43 | 
44 |     1. The environment resets and provides an observation within our timestep_limit
45 |     2. Done signals map to the following:
46 | 
47 |         done=True => Episode over (sent once at end of episode)
48 |         done=None => Resetting, agent takes no actions until done=False again
49 |         done=False => Episode is running, agent should take actions
50 |     """
51 |     count = 0
52 |     episode_state = "resetting"
53 | 
54 |     while True:
55 |         obs, reward, done, info = env.step([])  # Step with noop action
56 |         count += 1
57 | 
58 |         if episode_state == 'resetting':
59 |             if done is None:  # Still resetting
60 |                 assert obs is None
61 |                 continue
62 |             elif done is False:
63 |                 episode_state = 'running'
64 | 
65 |         if episode_state == 'running':
66 |             assert done is False
67 |             assert isinstance(reward, float)
68 |             assert isinstance(done, bool), "Received done=None before done=True"
69 |             # TODO: Remove this None check after we fix done=None semantics
70 |             if obs is not None:
71 |                 assert obs['vision'].shape == (768, 1024, 3)
72 |             break
73 | 
74 |         if timestep_limit is not None and count >= timestep_limit:
75 |             assert episode_state == 'running', "Failed to finish resetting in timestep limit"
76 |             break
77 | 
78 |         # if timestep_limit is not None and count >= timestep_limit:
79 |         #     self.assertTrue(completed_full_episode, "Failed to complete a full episode in timestep limit")
80 |         #     break
81 | 
82 | def configure_with_latest_docker_runtime_tag(env):
83 |     original_image = registration.runtime_spec(env.spec.tags['runtime']).image
84 |     latest_image = re.sub(r':.*', ':latest', original_image)
85 |     logger.info("Using latest image: {}".format(latest_image))
86 |     env.configure(remotes=1, docker_image=latest_image)
87 | 


--------------------------------------------------------------------------------
/universe/spaces/vnc_action_space.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import string
 3 | 
 4 | from gym.spaces import prng
 5 | 
 6 | from universe.vncdriver import constants
 7 | from universe.spaces import vnc_event
 8 | 
 9 | class VNCActionSpace(gym.Space):
10 |     """The space of VNC actions.
11 | 
12 |     You can submit a list of KeyEvents or PointerEvents. KeyEvents
13 |     correspond to pressing or releasing a key. PointerEvents correspond
14 |     to moving to a specific pixel, and setting the mouse buttons to some state
15 |     (buttonmask is a bitmap corresponding to which buttons are down).
16 | 
17 |     Note that key releases work differently from click releases: keys
18 |     are stateful and must be explicitly released, while the state of
19 |     the mouse buttons is provided at each timestep, so you have to
20 |     explicitly keep the mouse down.
21 | 
22 |     Attributes:
23 |         keys (list<KeyEvent>): The allowed key presses
24 |         buttonmasks (list<int>): The allowed buttonmasks (i.e. mouse presses)
25 |         screen_shape (int, int): The X and Y dimensions of the screen
26 |     """
27 | 
28 |     def __init__(self, keys=None, buttonmasks=None, screen_shape=(1024, 728)):
29 |         self.keys = []
30 |         if keys is None:
31 |             keys = [c for c in string.printable] + list(constants.KEYMAP.keys())
32 |         for key in (keys or []):
33 |             down = vnc_event.KeyEvent.by_name(key, down=True)
34 |             up = vnc_event.KeyEvent.by_name(key, down=False)
35 |             self.keys.append(down)
36 |             self.keys.append(up)
37 |         self._key_set = set(self.keys)
38 | 
39 |         self.screen_shape = screen_shape
40 |         if self.screen_shape is not None:
41 |             self.buttonmasks = []
42 |             if buttonmasks is None:
43 |                 buttonmasks = range(256)
44 |             for buttonmask in buttonmasks:
45 |                 self.buttonmasks.append(buttonmask)
46 |             self._buttonmask_set = set(self.buttonmasks)
47 | 
48 |     def contains(self, action):
49 |         if not isinstance(action, list):
50 |             return False
51 | 
52 |         for a in action:
53 |             if isinstance(a, vnc_event.KeyEvent):
54 |                 if a not in self._key_set:
55 |                     return False
56 |             elif isinstance(a, vnc_event.PointerEvent):
57 |                 if self.screen_shape is None:
58 |                     return False
59 | 
60 |                 if a.x < 0 or a.x > self.screen_shape[0]:
61 |                     return False
62 |                 elif a.y < 0 or a.y > self.screen_shape[1]:
63 |                     return False
64 |                 elif a.buttonmask not in self._buttonmask_set:
65 |                     return False
66 | 
67 |         return True
68 | 
69 |     def sample(self):
70 |         # Both key and pointer allowed
71 |         if self.screen_shape is not None:
72 |             event_type = prng.np_random.randint(2)
73 |         else:
74 |             event_type = 0
75 | 
76 |         if event_type == 0:
77 |             # Let's press a key
78 |             key = prng.np_random.choice(self.keys)
79 |             event = [key]
80 |         else:
81 |             x = prng.np_random.randint(self.screen_shape[0])
82 |             y = prng.np_random.randint(self.screen_shape[1])
83 |             buttonmask = prng.np_random.choice(self.buttonmasks)
84 | 
85 |             event = [vnc_event.PointerEvent(x, y, buttonmask)]
86 |         return event
87 | 


--------------------------------------------------------------------------------
/universe/rewarder/tests/test_reward_buffer.py:
--------------------------------------------------------------------------------
 1 | from universe.rewarder import reward_buffer
 2 | 
 3 | def test_prereset():
 4 |     buf = reward_buffer.RewardBuffer('buf')
 5 |     buf.push('1', 2, False, {'key': 'value'})
 6 |     reward, done, info = buf.pop()
 7 |     assert reward == 0
 8 |     assert done is False
 9 |     print(info)
10 | 
11 | def test_mask_peek():
12 |     buf = reward_buffer.RewardBuffer('buf')
13 |     buf.set_env_info('running', 'test-v0', '1', fps=60)
14 |     buf.push('1', 1, False, {'key': 'value'})
15 |     reward, done, info = buf.pop(peek=True)
16 |     assert info['env_status.episode_id'] is None
17 |     assert info['env_status.env_state'] is None
18 |     assert info['env_status.peek.episode_id'] is None
19 |     assert info['env_status.peek.env_state'] is None
20 | 
21 | def test_single():
22 |     buf = reward_buffer.RewardBuffer('buf')
23 |     buf.reset('1')
24 |     buf.push('1', 1, False, {'key': 'value'})
25 |     reward, done, info = buf.pop()
26 |     assert reward == 1.0
27 |     assert done is False
28 |     assert info['key'] == 'value'
29 |     assert info['env_status.episode_id'] == '1'
30 |     assert info['env_status.reset.episode_id'] == '1'
31 |     assert info['env.text'] == []
32 | 
33 | def test_multiple():
34 |     buf = reward_buffer.RewardBuffer('buf')
35 |     buf.reset('1')
36 |     buf.push('1', 1, False, {'key': 'value1'})
37 |     buf.push_text('1', 'text1')
38 | 
39 |     buf.push('2', 2, False, {'key': 'value2'})
40 |     buf.push_text('2', 'text2')
41 |     buf.push_text('2', 'text3')
42 |     reward, done, info = buf.pop()
43 |     assert reward == 1.0 # old
44 |     assert done is True # old
45 |     assert info['key'] == 'value1', 'Info: {}'.format(info) # old
46 |     assert info['env_status.episode_id'] == '2', 'got: {}, expected: {}'.format(info['env_status.episode_id'], '1')
47 |     assert info['env_status.complete.episode_id'] == '1'
48 |     assert info['env_status.reset.episode_id'] == '1'
49 |     assert info['env.text'] == ['text2', 'text3'] # new
50 | 
51 |     reward, done, info = buf.pop()
52 |     assert reward == 2.0 # new
53 |     assert done is False
54 |     assert info['key'] == 'value2'
55 |     assert info['env_status.episode_id'] == '2'
56 |     assert 'env_status.reset.episode_id' not in info
57 |     assert info['env.text'] == []
58 | 
59 | def test_double_reset():
60 |     buf = reward_buffer.RewardBuffer('buf')
61 |     buf.reset('1')
62 |     buf.set_env_info('running', 'test-v0', '1', fps=60)
63 |     buf.push('1', 1, False, {'key': 'value1'})
64 |     buf.set_env_info('resetting', 'test-v0', '2', fps=60)
65 |     buf.push('2', 20, False, {'key': 'value2'})
66 | 
67 |     reward, done, info = buf.pop(peek=True)
68 |     assert reward == 0
69 |     assert done == False
70 |     assert 'env_status.artificial.done' not in info
71 |     assert info['env_status.episode_id'] == '1'
72 |     assert info['env_status.env_state'] == 'running'
73 |     assert info['env_status.peek.episode_id'] == '2'
74 |     assert info['env_status.peek.env_state'] == 'resetting'
75 | 
76 |     buf.set_env_info('running', 'test-v0', '2', fps=60)
77 | 
78 |     reward, done, info = buf.pop(peek=True)
79 |     assert reward == 0
80 |     assert done == False
81 |     assert 'env_status.artificial.done' not in info
82 |     assert info['env_status.episode_id'] == '1'
83 |     assert info['env_status.env_state'] == 'running'
84 |     assert info['env_status.peek.episode_id'] == '2'
85 |     assert info['env_status.peek.env_state'] == 'running'
86 | 


--------------------------------------------------------------------------------
/universe/remotes/compose/progress_stream.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import unicode_literals
  3 | 
  4 | from universe.remotes.compose import utils
  5 | 
  6 | 
  7 | class StreamOutputError(Exception):
  8 |     pass
  9 | 
 10 | 
 11 | def stream_output(output, stream):
 12 |     is_terminal = hasattr(stream, 'isatty') and stream.isatty()
 13 |     stream = utils.get_output_stream(stream)
 14 |     all_events = []
 15 |     lines = {}
 16 |     diff = 0
 17 | 
 18 |     for event in utils.json_stream(output):
 19 |         all_events.append(event)
 20 |         is_progress_event = 'progress' in event or 'progressDetail' in event
 21 | 
 22 |         if not is_progress_event:
 23 |             print_output_event(event, stream, is_terminal)
 24 |             stream.flush()
 25 |             continue
 26 | 
 27 |         if not is_terminal:
 28 |             continue
 29 | 
 30 |         # if it's a progress event and we have a terminal, then display the progress bars
 31 |         image_id = event.get('id')
 32 |         if not image_id:
 33 |             continue
 34 | 
 35 |         if image_id in lines:
 36 |             diff = len(lines) - lines[image_id]
 37 |         else:
 38 |             lines[image_id] = len(lines)
 39 |             stream.write("\n")
 40 |             diff = 0
 41 | 
 42 |         # move cursor up `diff` rows
 43 |         stream.write("%c[%dA" % (27, diff))
 44 | 
 45 |         print_output_event(event, stream, is_terminal)
 46 | 
 47 |         if 'id' in event:
 48 |             # move cursor back down
 49 |             stream.write("%c[%dB" % (27, diff))
 50 | 
 51 |         stream.flush()
 52 | 
 53 |     return all_events
 54 | 
 55 | 
 56 | def print_output_event(event, stream, is_terminal):
 57 |     if 'errorDetail' in event:
 58 |         raise StreamOutputError(event['errorDetail']['message'])
 59 | 
 60 |     terminator = ''
 61 | 
 62 |     if is_terminal and 'stream' not in event:
 63 |         # erase current line
 64 |         stream.write("%c[2K\r" % 27)
 65 |         terminator = "\r"
 66 |     elif 'progressDetail' in event:
 67 |         return
 68 | 
 69 |     if 'time' in event:
 70 |         stream.write("[%s] " % event['time'])
 71 | 
 72 |     if 'id' in event:
 73 |         stream.write("%s: " % event['id'])
 74 | 
 75 |     if 'from' in event:
 76 |         stream.write("(from %s) " % event['from'])
 77 | 
 78 |     status = event.get('status', '')
 79 | 
 80 |     if 'progress' in event:
 81 |         stream.write("%s %s%s" % (status, event['progress'], terminator))
 82 |     elif 'progressDetail' in event:
 83 |         detail = event['progressDetail']
 84 |         total = detail.get('total')
 85 |         if 'current' in detail and total:
 86 |             percentage = float(detail['current']) / float(total) * 100
 87 |             stream.write('%s (%.1f%%)%s' % (status, percentage, terminator))
 88 |         else:
 89 |             stream.write('%s%s' % (status, terminator))
 90 |     elif 'stream' in event:
 91 |         stream.write("%s%s" % (event['stream'], terminator))
 92 |     else:
 93 |         stream.write("%s%s\n" % (status, terminator))
 94 | 
 95 | 
 96 | def get_digest_from_pull(events):
 97 |     for event in events:
 98 |         status = event.get('status')
 99 |         if not status or 'Digest' not in status:
100 |             continue
101 | 
102 |         _, digest = status.split(':', 1)
103 |         return digest.strip()
104 |     return None
105 | 
106 | 
107 | def get_digest_from_push(events):
108 |     for event in events:
109 |         digest = event.get('aux', {}).get('Digest')
110 |         if digest:
111 |             return digest
112 |     return None
113 | 


--------------------------------------------------------------------------------
/universe/vncdriver/screen/pyglet_screen.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import numpy as np
 3 | import os
 4 | from universe import pyprofile
 5 | import sys
 6 | 
 7 | from universe import error
 8 | from universe.vncdriver import server_messages
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | class PygletScreen(object):
13 |     def __init__(self, bitmap=None):
14 |         self._window = None
15 |         self._is_updated = False
16 |         self._height, self._width, _ = bitmap.shape
17 |         self._initialize()
18 |         self.update_rectangle(0, 0, self._width, self._height, bitmap)
19 | 
20 |     def flip(self):
21 |         if not self._is_updated:
22 |             return
23 |         self._is_updated = False
24 | 
25 |         self._window.clear()
26 |         self._window.switch_to()
27 |         self._window.dispatch_events()
28 |         self.texture.blit(0, 0)
29 |         self._window.flip()
30 | 
31 |     def _initialize(self):
32 |         if not os.environ.get('DISPLAY') and sys.platform.startswith('linux'):
33 |             raise error.Error("Cannot render with mode='human' with no DISPLAY variable set.")
34 | 
35 |         import pyglet
36 |         self._window = pyglet.window.Window(width=self._width, height=self._height, visible=True)
37 |         self._window.dispatch_events()
38 |         self.texture = pyglet.image.Texture.create(width=self._width, height=self._height)
39 | 
40 |     def update_rectangle(self, x, y, width, height, data):
41 |         bytes = data.tobytes()
42 |         pyprofile.incr('vncdriver.pyglet_screen.blit')
43 |         pyprofile.incr('vncdriver.pyglet_screen.blit.bytes', len(bytes), unit=pyprofile.BYTES)
44 |         import pyglet
45 |         image = pyglet.image.ImageData(width, height, 'RGB', bytes, pitch=width * -3)
46 |         self.texture.blit_into(image, x, self._height-height-y, 0)
47 |         self._is_updated = True
48 | 
49 |     def apply(self, framebuffer_update):
50 |         pyprofile.push('vncdriver.pyglet_screen.apply')
51 |         for rect in framebuffer_update.rectangles:
52 |             if isinstance(rect.encoding,
53 |                           (server_messages.RAWEncoding, server_messages.ZRLEEncoding, server_messages.ZlibEncoding)):
54 |                 self.update_rectangle(rect.x, rect.y, rect.width, rect.height, rect.encoding.data)
55 |             else:
56 |                 raise error.Error('Unrecognized encoding: {}'.format(rect.encoding))
57 |         pyprofile.pop()
58 | 
59 | 
60 | 
61 |     # # TODO: we don't seem to be able to have multiple independent
62 |     # # windows at once
63 |     # def update_rectangle(self, x, y, width, height, data):
64 |     #     self._update_rgbarray(x, y, width, height, update)
65 | 
66 | 
67 |     # def copy_rectangle(self, src_x, src_y, x, y, width, height):
68 |     #     assert self._window
69 |     #     rectangle = self.texture.get_region(src_x, self._height-height-src_y, width, height)
70 |     #     self.texture.blit_into(rectangle.get_image_data(), x, self._height-height-y, 0)
71 | 
72 |     # def fill_rectangle(self, x, y, width, height, color):
73 |     #     import pyglet
74 |     #     # While this technically works, it's super slow
75 |     #     update = np.frombuffer(color, dtype=np.uint8)
76 |     #     r, g, b = update[self._color_cycle]
77 |     #     image_pattern = pyglet.image.SolidColorImagePattern(color=(r, g, b, 0))
78 |     #     image = image_pattern.create_image(width, height)
79 |     #     self.texture.blit_into(image, x, self._height-height-y, 0)
80 | 
81 |     # def commit(self):
82 |     #     self._window.clear()
83 |     #     self._window.switch_to()
84 |     #     self.texture.blit(0, 0)
85 | 
86 |     #     self._is_updated = True
87 | 


--------------------------------------------------------------------------------
/universe/wrappers/monitoring.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import gym
 4 | from universe.vectorized import core  # Cannot import vectorized directly without inducing a cycle
 5 | from universe.wrappers.time_limit import TimeLimit
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | class _UniverseMonitor(core.Wrapper):
10 |     def __init__(self, env, directory, video_callable=None, force=False,
11 |                  resume=False, write_upon_reset=False, uid=None, mode=None):
12 |         super(_UniverseMonitor, self).__init__(env)
13 |         self.directory = directory
14 |         self.video_callable = video_callable
15 |         self.force = force
16 |         self.resume = resume
17 |         self.write_upon_reset = write_upon_reset
18 |         self.uid = uid
19 |         self.mode = mode
20 |         # TODO if we want to monitor more than one instance in a vectorized
21 |         # env we'll have to do this after configure()
22 |         self._start_monitor()
23 | 
24 |     def _start_monitor(self):
25 |         logger.info("Starting Monitor. Writing monitor logs to {}".format(self.directory))
26 | 
27 |         # Circular dependencies :(
28 |         from universe import wrappers
29 |         # We need to maintain pointers to these to avoid them being
30 |         # GC'd. They have a weak reference to us to avoid cycles.
31 |         # TODO if we want to monitor more than one instance in a vectorized
32 |         # env we'll need to actually fix WeakUnvectorize
33 |         self._unvectorized_envs = [wrappers.WeakUnvectorize(self.env, i) for i in range(1)]
34 | 
35 |         # For now we only monitor the first env
36 |         if hasattr(gym, 'wrappers'):
37 |             self._monitor = gym.wrappers.Monitor(self._unvectorized_envs[0],
38 |                 directory=self.directory,
39 |                 video_callable=self.video_callable,
40 |                 force=self.force,
41 |                 resume=self.resume,
42 |                 write_upon_reset=self.write_upon_reset,
43 |                 uid=self.uid,
44 |                 mode=self.mode
45 |             )
46 |         else:
47 |             logger.warn("DEPRECATION WARNING: You are using an older version of gym that has a deprecated Monitor, please update to gym:v0.8.0. This change was made 2017/02/01 and is included in universe version 0.21.3")
48 |             from gym import monitoring
49 |             self._monitor = monitoring.MonitorManager(self._unvectorized_envs[0])
50 |             self._monitor.start(
51 |                 self.directory,
52 |                 self.video_callable,
53 |                 self.force,
54 |                 self.resume,
55 |                 self.write_upon_reset,
56 |                 self.uid,
57 |                 self.mode
58 |             )
59 | 
60 |     def _step(self, action_n):
61 |         self._monitor._before_step(action_n[0])
62 |         observation_n, reward_n, done_n, info = self.env.step(action_n)
63 |         done_n[0] = self._monitor._after_step(observation_n[0], reward_n[0], done_n[0], info)
64 |         return observation_n, reward_n, done_n, info
65 | 
66 |     def _reset(self):
67 |         self._monitor._before_reset()
68 |         observation_n = self.env.reset()
69 |         self._monitor._after_reset(observation_n[0])
70 |         return observation_n
71 | 
72 |     def _close(self):
73 |         super(_UniverseMonitor, self)._close()
74 |         self._monitor.close()
75 | 
76 |     def set_monitor_mode(self, mode):
77 |         logger.info("Setting the monitor mode is deprecated and will be removed soon")
78 |         self._monitor._set_mode(mode)
79 | 
80 | def Monitor(env, directory, video_callable=None, force=False, resume=False,
81 |             write_upon_reset=False, uid=None, mode=None):
82 |     return _UniverseMonitor(TimeLimit(env), directory, video_callable, force, resume,
83 |                     write_upon_reset, uid, mode)
84 | 


--------------------------------------------------------------------------------
/doc/env_semantics.rst:
--------------------------------------------------------------------------------
 1 | Environment semantics
 2 | *********************
 3 |   
 4 | Real-time environments
 5 | ======================
 6 | 
 7 | Universe environments differ from other Gym environments in that the
 8 | environment keeps running in real-time, even when the agent does not
 9 | call ``step``. This has a few important implications:
10 | 
11 | * Actions and observations can no longer be considered to
12 |   occur on a "clock tick".
13 | * An explicit call to ``reset`` is asynchronous and returns
14 |   immediately, even though the environment has not yet finished
15 |   resetting. (If you would prefer the ``reset`` call to block
16 |   until the reset has finished, you can wrap
17 |   the client-side environment with a `BlockingReset <https://github.com/openai/universe/blob/master/universe/wrappers/blocking_reset.py>`__ wrapper)
18 | * Since the environment will not have waited to finish
19 |   connecting to the VNC server before returning, the initial return
20 |   values from ``reset`` will be ``None`` to indicate that there is
21 |   not yet a valid observation.
22 | * An agent that successfully learns from a Universe environment cannot
23 |   take "thinking breaks": it must keep sending actions to the
24 |   environment at all times.
25 | * Lag and latency play a major role in your agent's ability to
26 |   successfully learn in a given environment. The latency and profiling
27 |   numbers returned in the ``info`` dictionary can provide important
28 |   information for training.
29 | 
30 | Vectorized API
31 | ==============
32 | 
33 | The vectorized Gym API allows a single client-side environment to
34 | control a vector of remotes. The main difference with the
35 | non-vectorized Gym API is that individual environments will
36 | automatically reset upon reaching the end of an episode. (An episode
37 | is defined as ending when an agent has concretely succeeded or failed
38 | at the task, such as after clearing a level of a game, or losing the
39 | game. Some environments without clearly delineated success and
40 | failure conditions may not have episodes.)
41 | 
42 | There are two API methods, ``reset`` and ``step``. The semantics are:
43 | 
44 | - ``reset`` takes no arguments and returns a vector of observations:
45 | 
46 | .. code:: python
47 | 
48 |   observation_n = env.reset()
49 | 
50 | - ``step`` consumes a vector of actions, and returns a vector of
51 |   observations, vector of rewards, vector of done booleans, and an
52 |   info dictionary. The info dictionary has an ``n`` key, which
53 |   contains a vector of infos specific to each env:
54 | 
55 | .. code:: python
56 | 
57 |   observation_n, reward_n, done_n, info = env.step(action_n)
58 |   # len(info['n']) == len(observation_n)
59 | 
60 | Some important notes:
61 | 
62 | - At any given moment, some of the environments may be
63 |   resetting. Resetting environments will have a ``None`` value for
64 |   their observation. For example, an ``observation_n`` of ``[None,
65 |   {'vision': ...}, {'vision': ...}]`` indicates that the environment
66 |   at index 0 is resetting.
67 | - When an index returns ``done=True``, the corresponding environment
68 |   will automatically start resetting.
69 | - The user must call ``reset`` once before calling ``step``; undefined
70 |   behavior will result if ``reset`` is not called. Further ``reset``
71 |   calls are allowed, but generally are used only if the environment has
72 |   been idle for a while (such as with periodic evaluation), or when it
73 |   is important to start at the beginning 
74 | 
75 | Versioning
76 | ==========
77 | 
78 | The remote is versioned and has fixed semantics, assuming sufficient
79 | compute resources are applied (i.e. if you don't have enough CPU, your
80 | flash environments will likely behave differently). The client's exact
81 | semantics will depend on the version of universe you have installed,
82 | and you should track the version of that together with the rest of
83 | your agent code.
84 | 
85 | 


--------------------------------------------------------------------------------
/example/recorders/botaction_recorder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 |   This is a small server that accepts connections on a websocket port and writes it to a file.
 4 | 
 5 |   The purpose is to allow a universe-env with a built-in bot to record the actions it's taking
 6 |   as a demonstration. So the demonstration includes a botactions.jsonl file that gets used instead
 7 |   of the vnc client log. (The vnc client log is still recorded and needed to fully parse the VNC
 8 |   protocol.)
 9 | 
10 |   It's much simpler than reward_recorder.py, because it doesn't have to also talk to the agent.
11 |   It just takes json messages over a websocket and appends them separated by newlines to the log file.
12 | 
13 |   The ws port is 15986 unless overridden with --listen-address
14 |   The log file is /tmp/demo/botactions.jsonl unless overridden with --botaction-logfile
15 | """
16 | import argparse
17 | import logging
18 | import sys
19 | import json
20 | from autobahn.twisted import websocket
21 | from universe.twisty import reactor
22 | logger = logging.getLogger()
23 | 
24 | class BotactionRecordingServer(websocket.WebSocketServerProtocol, object):
25 | 
26 |     _next_id = 1
27 |     @classmethod
28 |     def next_id(cls):
29 |         id = cls._next_id
30 |         cls._next_id += 1
31 |         return id
32 | 
33 |     logfile_path='/tmp/demo/botactions.jsonl'
34 | 
35 |     def __init__(self):
36 |         super(BotactionRecordingServer, self).__init__()
37 |         self.id = self.next_id()
38 |         self._closed = False
39 |         self.file = None
40 | 
41 |         logger.info("[BotactionRecordingServer] [%d] Wrote version number", self.id)
42 | 
43 |     def _emit(self, rec):
44 |         if self.file:
45 |             self.file.write(json.dumps(rec) + '\n');
46 |             self.file.flush()
47 | 
48 |     def onConnect(self, request):
49 |         logger.info('[BotactionRecordingServer] [%d] Client connecting: %s. Writing to %s', self.id, request.peer, self.logfile_path)
50 |         self.file = open(self.logfile_path, 'w', encoding='utf-8')
51 |         self._emit({
52 |             'version': 1,
53 |             'session_id': self.id,
54 |             '_debug_version': '0.0.1',  # Give this an internal version for debugging corrupt reward.demo files # TODO, pull this from setup.py or the host docker image
55 |         })
56 | 
57 |     def onOpen(self):
58 |         logger.info("[BotactionRecordingServer] [%d] Websocket connection established", self.id)
59 | 
60 |     def onClose(self, wasClean, code, reason):
61 |         logger.info('[BotactionRecordingServer] [%d] Client connection closed: %s', self.id, reason)
62 |         if self.file:
63 |             self.file.close()
64 |             self.file = None
65 | 
66 |         self._closed = True
67 | 
68 |     def onMessage(self, msg, binary):
69 |         logger.debug('[BotactionRecordingServer] [%d] Received message from client: %s', self.id, msg)
70 | 
71 |         self._emit(json.loads(msg.decode('utf-8')));
72 | 
73 | def main():
74 |     parser = argparse.ArgumentParser(description=None)
75 |     parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
76 |     parser.add_argument('-l', '--listen-address', default='127.0.0.1:15896', help='Address to listen on')
77 |     parser.add_argument('-o', '--botaction-logfile', default='/tmp/demo/botactions.jsonl', help='Filename for timestamped log of bot actions.')
78 |     args = parser.parse_args()
79 | 
80 |     BotactionRecordingServer.logfile_path = args.botaction_logfile
81 | 
82 |     if args.verbosity == 0:
83 |         logger.setLevel(logging.INFO)
84 |     elif args.verbosity >= 1:
85 |         logger.setLevel(logging.DEBUG)
86 | 
87 |     factory = websocket.WebSocketServerFactory()
88 |     factory.protocol = BotactionRecordingServer
89 | 
90 |     host, port = args.listen_address.split(':')
91 |     port = int(port)
92 |     logger.info('Listening on %s:%s', host, port)
93 |     reactor.listenTCP(port, factory)
94 |     reactor.run()
95 |     return 0
96 | 
97 | if __name__ == '__main__':
98 |     sys.exit(main())
99 | 


--------------------------------------------------------------------------------
/universe/vncdriver/vnc_session.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from twisted.internet import defer, endpoints
  4 | 
  5 | from universe import error, utils
  6 | from universe.twisty import reactor
  7 | from universe.vncdriver import screen, vnc_client
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | class VNCSession(object):
 12 |     def __init__(self, remotes, error_buffer):
 13 |         self.remotes = remotes
 14 |         self.error_buffer = error_buffer
 15 |         self._pyglet_screen = None
 16 |         self.connect()
 17 | 
 18 |     def connect(self):
 19 |         utils.blockingCallFromThread(self._connect)
 20 | 
 21 |     def _connect(self):
 22 |         deferreds = []
 23 | 
 24 |         for i, remote in enumerate(self.remotes):
 25 |             d = defer.Deferred()
 26 |             deferreds.append(d)
 27 | 
 28 |             factory = vnc_client.client_factory(d, self.error_buffer)
 29 |             factory.rewarder_session = self
 30 |             factory.label = 'vnc:{}:{}'.format(i, remote)
 31 |             endpoint = endpoints.clientFromString(reactor, 'tcp:'+remote)
 32 | 
 33 |             def success(i):
 34 |                 logger.info('[%s] VNC connection established', factory.label)
 35 | 
 36 |             def fail(reason):
 37 |                 reason = error.Error('[{}] Connection failed: {}'.format(factory.label, reason.value))
 38 |                 try:
 39 |                     d.errback(utils.format_error(reason))
 40 |                 except defer.AlreadyCalledError:
 41 |                     pass
 42 |             endpoint.connect(factory).addCallback(success).addErrback(fail)
 43 | 
 44 |         d = defer.DeferredList(deferreds, fireOnOneErrback=True)
 45 | 
 46 |         def success(results):
 47 |             # Store the _clients list when connected
 48 |             self._clients = [client for success, client in results]
 49 |         d.addCallback(success)
 50 |         return d
 51 | 
 52 |     def flip(self):
 53 |         observation_n = []
 54 |         info_n = []
 55 |         for i, client in enumerate(self._clients):
 56 |             observation, info = client.numpy_screen.flip()
 57 |             updates = info['vnc_session.framebuffer_updates']
 58 | 
 59 |             # Keep the pyglet screen fed, but don't flip it until the user calls render
 60 |             if i == 0 and self._pyglet_screen:
 61 |                 for update in updates:
 62 |                     self._pyglet_screen.apply(update)
 63 | 
 64 |             observation_n.append(observation)
 65 |             info_n.append({'vnc.updates.n': len(updates)})
 66 | 
 67 |         return observation_n, info_n
 68 | 
 69 |     def peek(self):
 70 |         observations = [client.numpy_screen.peek() for client in self._clients]
 71 |         return observations
 72 | 
 73 |     def step(self, action):
 74 |         reactor.callFromThread(self._step, action)
 75 |         return self.flip()
 76 | 
 77 |     def _step(self, action):
 78 |         try:
 79 |             for a, client in zip(action, self._clients):
 80 |                 for event in a:
 81 |                     if event[0] == 'KeyEvent':
 82 |                         key, down = event[1:]
 83 |                         client.send_KeyEvent(key, down)
 84 |                     elif event[0] == 'PointerEvent':
 85 |                         x, y, buttomask = event[1:]
 86 |                         client.send_PointerEvent(x, y, buttomask)
 87 |                     else:
 88 |                         raise error.Error('Bad event type: {}'.format(type))
 89 |         except Exception as e:
 90 |             self.error_buffer.record(e)
 91 | 
 92 |     def render(self):
 93 |         if not self._pyglet_screen:
 94 |             start = self.peek()[0]
 95 |             self._pyglet_screen = screen.PygletScreen(start)
 96 |         self._pyglet_screen.flip()
 97 | 
 98 |     def close(self):
 99 |         utils.blockingCallFromThread(self._close)
100 | 
101 |     def _close(self):
102 |         if getattr(self, '_clients', None) is not None:
103 |             for client in self._clients:
104 |                 client.close()
105 |             self._clients = None
106 | 


--------------------------------------------------------------------------------
/universe/wrappers/gym_core_sync.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import logging
 3 | from universe import rewarder, spaces, vectorized
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | class GymCoreSync(vectorized.Wrapper):
 8 |     """A synchronized version of the core envs. Its semantics should match
 9 |     that of the core envs. (By default, observations are pixels from
10 |     the VNC session, but it also supports receiving the normal Gym
11 |     observations over the rewarder protocol.)
12 | 
13 |     Provided primarily for testing and debugging.
14 |     """
15 | 
16 |     def __init__(self, env):
17 |         super(GymCoreSync, self).__init__(env)
18 |         self.reward_n = None
19 |         self.done_n = None
20 |         self.info = None
21 | 
22 |         # Metadata has already been cloned
23 |         self.metadata['semantics.async'] = False
24 | 
25 |     def _reset(self):
26 |         observation_n = self.env.reset()
27 |         new_observation_n, self.reward_n, self.done_n, self.info = self.env.step([[] for i in range(self.n)])
28 |         rewarder.merge_observation_n(observation_n, new_observation_n)
29 | 
30 |         # Fast forward until the observation is caught up with the rewarder
31 |         self._flip_past(observation_n, self.reward_n, self.done_n, self.info)
32 | 
33 |         assert all(r == 0 for r in self.reward_n), "Unexpectedly received rewards during reset phase: {}".format(self.reward_n)
34 |         return observation_n
35 | 
36 |     def _step(self, action_n):
37 |         # Add C keypress in order to "commit" the action, as
38 |         # interpreted by the remote.
39 |         action_n = [action + [
40 |             spaces.KeyEvent.by_name('c', down=True),
41 |             spaces.KeyEvent.by_name('c', down=False)
42 |         ] for action in action_n]
43 | 
44 |         observation_n, reward_n, done_n, info = self.env.step(action_n)
45 |         if self.reward_n is not None:
46 |             rewarder.merge_n(
47 |                 observation_n, reward_n, done_n, info,
48 |                 [None] * self.n, self.reward_n, self.done_n, self.info,
49 |             )
50 |             self.reward_n = self.done_n = self.info = None
51 | 
52 |         while True:
53 |             count = len([True for info_i in info['n'] if info_i['stats.reward.count'] == 0])
54 |             if count > 0:
55 |                 logger.debug('[GymCoreSync] Still waiting on %d envs to receive their post-commit reward', count)
56 |             else:
57 |                 break
58 | 
59 |             new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)])
60 |             rewarder.merge_n(
61 |                 observation_n, reward_n, done_n, info,
62 |                 new_observation_n, new_reward_n, new_done_n, new_info
63 |             )
64 | 
65 |         assert all(info_i['stats.reward.count'] == 1 for info_i in info['n']), "Expected all stats.reward.counts to be 1: {}".format(info)
66 | 
67 |         # Fast forward until the observation is caught up with the rewarder
68 |         self._flip_past(observation_n, reward_n, done_n, info)
69 |         return observation_n, reward_n, done_n, info
70 | 
71 |     def _flip_past(self, observation_n, reward_n, done_n, info):
72 |         # Wait until all observations are past the corresponding reset times
73 |         remote_target_time = [info_i['reward_buffer.remote_time'] for info_i in info['n']]
74 |         while True:
75 |             new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)])
76 | 
77 |             # info_i.get['diagnostics.image_remote_time'] may not exist, for example when an env
78 |             # is resetting. target is a timestamp, thus > 0, so these will count as "need to catch up"
79 |             deltas = [target - info_i.get('diagnostics.image_remote_time', 0) for target, info_i in zip(remote_target_time, new_info['n'])]
80 |             count = len([d for d in deltas if d > 0])
81 | 
82 |             rewarder.merge_n(
83 |                 observation_n, reward_n, done_n, info,
84 |                 new_observation_n, new_reward_n, new_done_n, new_info
85 |             )
86 | 
87 |             if count == 0:
88 |                 return
89 |             else:
90 |                 logger.debug('[GymCoreSync] Still waiting on %d envs to catch up to their targets: %s', count, deltas)
91 | 


--------------------------------------------------------------------------------
/universe/rewarder/env_status.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import threading
  3 | 
  4 | logger = logging.getLogger()
  5 | 
  6 | def parse_episode_id(episode_id):
  7 |     if episode_id is None:
  8 |         return -1
  9 |     return int(episode_id)
 10 | 
 11 | def generate_episode_id(parsed):
 12 |     if parsed == -1:
 13 |         return None
 14 |     return str(parsed)
 15 | 
 16 | def compare_ids(a, b):
 17 |     if a == b:
 18 |         return 0
 19 |     elif a is None:
 20 |         return -1
 21 |     elif b is None:
 22 |         return 1
 23 |     elif parse_episode_id(a) < parse_episode_id(b):
 24 |         return -1
 25 |     else:
 26 |         return 1
 27 | 
 28 | class EnvStatus(object):
 29 |     def __init__(self, label=None, primary=True):
 30 |         self.cv = threading.Condition()
 31 |         self._env_id = None
 32 |         self._env_state = None
 33 |         self._episode_id = '0'
 34 |         self._fps = None
 35 |         self.label = label or 'EnvStatus'
 36 |         self.primary = primary
 37 | 
 38 |     def env_info(self):
 39 |         with self.cv:
 40 |             return {
 41 |                 'env_state': self._env_state,
 42 |                 'env_id': self._env_id,
 43 |                 'episode_id': self._episode_id,
 44 |                 'fps': self._fps,
 45 |             }
 46 | 
 47 |     def set_env_info(self, env_state=None, env_id=None, episode_id=None, bump_past=None, fps=None):
 48 |         """Atomically set the environment state tracking variables.
 49 |         """
 50 |         with self.cv:
 51 |             if env_id is None:
 52 |                 env_id = self._env_id
 53 |             if env_state is None:
 54 |                 env_state = self._env_state
 55 |             if fps is None:
 56 |                 fps = self._fps
 57 |             self.cv.notifyAll()
 58 | 
 59 |             old_episode_id = self._episode_id
 60 |             if self.primary:
 61 |                 current_id = parse_episode_id(self._episode_id)
 62 |                 # Bump when changing from resetting -> running
 63 |                 if bump_past is not None:
 64 |                     bump_past_id = parse_episode_id(bump_past)
 65 |                     current_id = max(bump_past_id+1, current_id+1)
 66 |                 elif env_state == 'resetting':
 67 |                     current_id += 1
 68 |                 self._episode_id = generate_episode_id(current_id)
 69 |                 assert self._fps or fps
 70 |             elif episode_id is False:
 71 |                 # keep the same episode_id: this is just us proactive
 72 |                 # setting the state to resetting after a done=True
 73 |                 pass
 74 |             else:
 75 |                 assert episode_id is not None, "No episode_id provided. This likely indicates a misbehaving server, which did not send an episode_id"
 76 |                 self._episode_id = episode_id
 77 |             self._fps = fps
 78 |             logger.info('[%s] Changing env_state: %s (env_id=%s) -> %s (env_id=%s) (episode_id: %s->%s, fps=%s)', self.label, self._env_state, self._env_id, env_state, env_id, old_episode_id, self._episode_id, self._fps)
 79 |             self._env_state = env_state
 80 |             if env_id is not None:
 81 |                 self._env_id = env_id
 82 | 
 83 |             return self.env_info()
 84 | 
 85 |     @property
 86 |     def episode_id(self):
 87 |         with self.cv:
 88 |             return self._episode_id
 89 | 
 90 |     @property
 91 |     def env_state(self):
 92 |         with self.cv:
 93 |             return self._env_state
 94 | 
 95 |     @env_state.setter
 96 |     def env_state(self, value):
 97 |         # TODO: Validate env_state
 98 |         self.set_env_info(value)
 99 | 
100 |     @property
101 |     def env_id(self):
102 |         with self.cv:
103 |             return self._env_id
104 | 
105 |     @env_id.setter
106 |     def env_id(self, value):
107 |         self.set_env_info(None, env_id=value)
108 | 
109 |     @property
110 |     def fps(self):
111 |         with self.cv:
112 |             return self._fps
113 | 
114 |     def wait_for_env_state_change(self, start_state):
115 |         with self.cv:
116 |             while True:
117 |                 if self._env_state != start_state:
118 |                     return self.env_info()
119 |                 self.cv.wait(timeout=10)
120 | 


--------------------------------------------------------------------------------
/universe/runtimes/registration.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import json
  3 | 
  4 | import six
  5 | from gym import error
  6 | 
  7 | 
  8 | class UnregisteredRuntime(error.Unregistered):
  9 |     """Raised when the user requests a runtime from the registry that does
 10 |     not actually exist.
 11 |     """
 12 |     pass
 13 | 
 14 | class DockerRuntime(object):
 15 |     """Lightweight struct for our DockerImage configuration"""
 16 |     def __init__(self, id=id, image=None, command=None, host_config=None, default_params=None, server_registry_file=None):
 17 |         """
 18 |         Args:
 19 |             id: The short identifier for this runtime
 20 |             image: The full docker image name including a tag
 21 |             command: A list of commands to be passed to docker
 22 |             host_config: A dict that will be fed to docker.Client().create_host_config
 23 |             default_params: The default parameter values for this environment
 24 |             server_registry: A file containing a JSON dump of the server registry. The format will be runtime-specific.
 25 |         """
 26 |         self.id = id
 27 |         self.image = image
 28 |         self.command = command or []
 29 |         self.host_config = host_config or {}
 30 |         self.default_params = default_params or {}
 31 | 
 32 |         self._server_registry = None
 33 |         self._server_registry_file = server_registry_file
 34 | 
 35 |     @property
 36 |     def server_registry(self):
 37 |         if self._server_registry is None:
 38 |             with open(self._server_registry_file) as f:
 39 |                 self._server_registry = json.load(f)
 40 |         return self._server_registry
 41 | 
 42 |     @property
 43 |     def _cli_flags(self):
 44 |         # Not everything maps in a straightforward way, e.g. cap_add => '--cap-add' but ipc_mode => '--ipc
 45 |         api_to_cli = {
 46 |             'ipc_mode': 'ipc'
 47 |         }
 48 | 
 49 |         cli_flags = []
 50 |         for api_key, api_value in self.host_config.items():
 51 |             if isinstance(api_value, (six.string_types, bool)):
 52 |                 cli_values = [api_value]
 53 |             else:
 54 |                 cli_values = api_value
 55 | 
 56 |             for cli_value in cli_values:
 57 |                 if api_key in api_to_cli:
 58 |                     api_key = api_to_cli[api_key]
 59 |                 cli_flag = '--{}'.format(api_key.replace('_', '-'))
 60 |                 if isinstance(cli_value, bool):
 61 |                     # boolean flag, like --privileged
 62 |                     cli_flags += [cli_flag]
 63 |                 else:
 64 |                     cli_flags += [cli_flag, cli_value]
 65 | 
 66 |         return cli_flags
 67 | 
 68 |     def cli_command(self, vnc_port, rewarder_port, extra_flags=[]):
 69 |         return ['docker', 'run',
 70 |            '-p', '{}:5900'.format(vnc_port),
 71 |            '-p', '{}:15900'.format(rewarder_port)] + \
 72 |            extra_flags + \
 73 |            self._cli_flags + \
 74 |            [self.image] + self.command
 75 | 
 76 | 
 77 | class WindowsRuntime(object):
 78 |     # TODO: Spawn windows runtimes (right now managed manually)
 79 |     def __init__(self, id=id, default_params=None):
 80 |         """
 81 |         Args:
 82 |             id: The short identifier for this runtime
 83 |         """
 84 |         self.id = id
 85 |         self.default_params = default_params
 86 | 
 87 | 
 88 | class Registry(object):
 89 |     def __init__(self):
 90 |         self.runtimes = collections.OrderedDict()
 91 | 
 92 |     def register_runtime(self, id, kind, **kwargs):
 93 |         if kind == "docker":
 94 |             self.runtimes[id] = DockerRuntime(id, **kwargs)
 95 |         elif kind == "windows":
 96 |             self.runtimes[id] = WindowsRuntime(id, **kwargs)
 97 |         else:
 98 |             raise error.Error("No runtime of kind {} . \n Valid options are ['docker']".format(kind))
 99 | 
100 |     def runtime_spec(self, id):
101 |         """
102 |         id is a string describing the runtime, e.g 'flashgames
103 | 
104 |         Returns a configured DockerRuntime object
105 |         """
106 |         try:
107 |             return self.runtimes[id]
108 |         except KeyError:
109 |             raise UnregisteredRuntime('No registered runtime with name: {}'.format(id))
110 | 
111 | 
112 | registry = Registry()
113 | register_runtime = registry.register_runtime
114 | runtime_spec = registry.runtime_spec
115 | 


--------------------------------------------------------------------------------
/universe/utils/display.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import logging
  3 | 
  4 | import six
  5 | import numpy as np
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | # We log these with logger, which in py2 chokes on unicode
 10 | def fmt_plusminus(mean, dev):
 11 |     if six.PY3:
 12 |         return mean + '±' + dev
 13 |     else:
 14 |         # Logging unicode in py2 is asking for trouble
 15 |         return mean + '+-' + dev
 16 | 
 17 | def compute_timestamps_pair_max(time_m_2, flat=True):
 18 |     if flat:
 19 |         # Ignore empty inputs, which happens when environments are resetting.
 20 |         time_m_2 = [[x for x in time_m_2 if len(x)]]
 21 | 
 22 |     if len(time_m_2) == 0:
 23 |         return None, None
 24 | 
 25 |     # We concatenate the (min, max) lags from a variety of runs. Those
 26 |     # runs may have different lengths.
 27 |     time_m_2 = [np.array(m) for m in time_m_2]
 28 | 
 29 |     timestamp_m = []
 30 |     data_m = []
 31 |     for m in time_m_2:
 32 |         if len(m) > 0:
 33 |             timestamp, data = compute_timestamps_sigma(m[:, 1])
 34 |             timestamp_m.append(timestamp)
 35 |             data_m.append(data)
 36 |         else:
 37 |             timestamp_m.append(None)
 38 |             data_m.append({})
 39 |     return timestamp_m, data_m
 40 | 
 41 | def display_timestamps_pair_compact(time_m_2):
 42 |     """Takes a list of the following form: [(a1, b1), (a2, b2), ...] and
 43 |     returns a string a_mean-b_mean, flooring out at 0.
 44 |     """
 45 |     if len(time_m_2) == 0:
 46 |         return '(empty)'
 47 | 
 48 |     time_m_2 = np.array(time_m_2)
 49 | 
 50 |     low = time_m_2[:, 0].mean()
 51 |     high = time_m_2[:, 1].mean()
 52 | 
 53 |     low = max(low, 0)
 54 | 
 55 |     # Not sure if this'll always be true, and not worth crashing over
 56 |     if high < 0:
 57 |         logger.warn('Harmless warning: upper-bound on clock skew is negative: (%s, %s). Please let Greg know about this.', low, high)
 58 | 
 59 |     return '{}-{}'.format(display_timestamp(low), display_timestamp(high))
 60 | 
 61 | def display_timestamps_pair(time_m_2):
 62 |     """Takes a list of the following form: [(a1, b1), (a2, b2), ...] and
 63 |     returns a string (a_mean+/-a_error, b_mean+/-b_error).
 64 |     """
 65 |     if len(time_m_2) == 0:
 66 |         return '(empty)'
 67 | 
 68 |     time_m_2 = np.array(time_m_2)
 69 |     return '({}, {})'.format(
 70 |         display_timestamps(time_m_2[:, 0]),
 71 |         display_timestamps(time_m_2[:, 1]),
 72 |     )
 73 | 
 74 | def compute_timestamps_sigma_n(time_m):
 75 |     timestamp_m = []
 76 |     data_m = []
 77 | 
 78 |     for t in time_m:
 79 |         timestamp, data = compute_timestamps(t)
 80 |         timestamp_m.append(timestamp)
 81 |         data_m.append(data)
 82 | 
 83 |     return timestamp_m, data_m
 84 | 
 85 | def compute_timestamps_sigma(time_m):
 86 |     if len(time_m) == 0:
 87 |         return None, {}
 88 | 
 89 |     mean = np.mean(time_m)
 90 |     std = standard_error(time_m)
 91 |     scale, units = pick_time_units(mean)
 92 |     return fmt_plusminus('{:.2f}{}'.format(mean * scale, units), '{:.2f}{}'.format(std * scale, units)), {'mean': mean}
 93 | 
 94 | def display_timestamps(time_m):
 95 |     res, _ = compute_timestamps(time_m)
 96 |     if res is None:
 97 |         return '(empty)'
 98 |     else:
 99 |         return res
100 | 
101 | def compute_timestamps(time_m):
102 |     if len(time_m) == 0:
103 |         return None, {}
104 | 
105 |     mean = np.mean(time_m)
106 |     std = standard_error(time_m)
107 |     return fmt_plusminus(display_timestamp(mean), display_timestamp(std)), {'mean': mean}
108 | 
109 | def display_timestamps_n(time_m):
110 |     # concatenate all the n's timesteps together, then display_timestamps on it
111 |     return display_timestamps(np.concatenate(time_m))
112 | 
113 | def standard_error(ary, axis=0):
114 |     if len(ary) > 1:
115 |         return np.std(ary, axis=axis) / np.sqrt(len(ary) - 1)
116 |     else:
117 |         return np.std(ary, axis=axis)
118 | 
119 | def display_timestamp(time):
120 |     assert not isinstance(time, np.ndarray), 'Invalid scalar: {}'.format(time)
121 |     scale, units = pick_time_units(time)
122 |     return '{:.2f}{}'.format(time * scale, units)
123 | 
124 | def pick_time_units(time):
125 |     assert not isinstance(time, np.ndarray), 'Invalid scalar: {}'.format(time)
126 |     if abs(time) < 1:
127 |         return 1000, 'ms'
128 |     else:
129 |         return 1, 's'
130 | 


--------------------------------------------------------------------------------
/tests/functional/test_core_envs_semantics.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import pytest
  3 | 
  4 | import gym
  5 | import numpy as np
  6 | from PIL import Image
  7 | from gym import spaces
  8 | from universe import wrappers
  9 | from universe.envs.vnc_core_env import translator
 10 | 
 11 | def show(obs):
 12 |     Image.fromarray(obs).show()
 13 | 
 14 | class AtariMatcher(object):
 15 |     def translator(self, env):
 16 |         return translator.AtariTranslator(env)
 17 | 
 18 |     def crop(self, obs):
 19 |         return obs[20:210, :160, :]
 20 | 
 21 |     def assert_match(self, obs, vnc_obs, extra_info=None, stage=None):
 22 |         # Crop out the mouse
 23 |         vnc_obs_cropped = self.crop(vnc_obs)
 24 |         obs_cropped = self.crop(obs)
 25 | 
 26 |         if not np.all(vnc_obs_cropped == obs_cropped):
 27 |             show(vnc_obs_cropped)
 28 |             show(obs_cropped)
 29 |             show(vnc_obs_cropped - obs_cropped)
 30 |             assert False, '[{}] Observations do not match: vnc_obs_cropped={} obs_cropped={} extra_info={}'.format(stage, vnc_obs_cropped, obs_cropped, extra_info)
 31 | 
 32 | # Wraps an Atari-over-VNC env so that it behaves like a vectorized vanilla Atari env
 33 | def atari_vnc_wrapper(env):
 34 |     env = wrappers.Vision(env)
 35 |     env = wrappers.GymCoreAction(env)
 36 |     return env
 37 | 
 38 | class CartPoleLowDMatcher(object):
 39 |     def translator(self, env):
 40 |         return translator.CartPoleTranslator(env)
 41 | 
 42 |     def assert_match(self, obs, vnc_obs, extra_info=None, stage=None):
 43 |         assert np.all(np.isclose(obs, vnc_obs)), '[{}] Observations do not match: vnc_obs={} obs={}'.format(stage, vnc_obs, obs)
 44 | 
 45 | def reset(matcher, env, vnc_env, stage=None):
 46 |     obs = env.reset()
 47 |     vnc_obs = vnc_env.reset()
 48 |     matcher.assert_match(obs, vnc_obs, stage=stage)
 49 | 
 50 | def rollout(matcher, env, vnc_env, timestep_limit=None, stage=None):
 51 |     count = 0
 52 |     actions = matcher.translator(env)
 53 | 
 54 |     done = None
 55 |     while True:
 56 |         action = env.action_space.sample()
 57 | 
 58 |         obs, reward, done, info = env.step(action)
 59 |         if done:
 60 |             # Account for remote auto-reset
 61 |             obs = env.reset()
 62 | 
 63 |         vnc_obs, vnc_reward, vnc_done, vnc_info = vnc_env.step(action)
 64 |         assert reward == vnc_reward
 65 |         assert done == vnc_done
 66 |         assert vnc_info['stats.reward.count'] == 1
 67 |         matcher.assert_match(obs, vnc_obs, {'reward': reward, 'done': done}, stage=stage)
 68 | 
 69 |         count += 1
 70 |         if done or (timestep_limit is not None and count >= timestep_limit):
 71 |             break
 72 | 
 73 | # TODO: we should have auto-env spinup
 74 | specs = [
 75 |     (gym.spec('gym-core.PongDeterministicSync-v3'), AtariMatcher(), atari_vnc_wrapper),
 76 |     (gym.spec('gym-core.PitfallDeterministicSync-v3'), AtariMatcher(), atari_vnc_wrapper),
 77 | 
 78 |     # This test is still broken. Looks like we're not piping the seed
 79 |     # to the CartPole env behind VNC
 80 | #    (gym.spec('gym-core.CartPoleLowDSync-v0'), CartPoleLowDMatcher())
 81 | ]
 82 | 
 83 | @pytest.mark.parametrize("spec,matcher,wrapper", specs)
 84 | def test_nice_vnc_semantics_match(spec, matcher, wrapper):
 85 |     # Check that when running over VNC or using the raw environment,
 86 |     # semantics match exactly.
 87 |     gym.undo_logger_setup()
 88 |     logging.getLogger().setLevel(logging.INFO)
 89 | 
 90 |     spaces.seed(0)
 91 | 
 92 |     vnc_env = spec.make()
 93 |     if vnc_env.metadata.get('configure.required', False):
 94 |         vnc_env.configure(remotes=1)
 95 |     vnc_env = wrapper(vnc_env)
 96 |     vnc_env = wrappers.Unvectorize(vnc_env)
 97 | 
 98 |     env = gym.make(spec._kwargs['gym_core_id'])
 99 | 
100 |     env.seed(0)
101 |     vnc_env.seed(0)
102 | 
103 |     # Check that reset observations work
104 |     reset(matcher, env, vnc_env, stage='initial reset')
105 | 
106 |     # Check a full rollout
107 |     rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps')
108 | 
109 |     # Reset to start a new episode
110 |     reset(matcher, env, vnc_env, stage='reset to new episode')
111 | 
112 |     # Check that a step into the next episode works
113 |     rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode')
114 | 
115 |     # Make sure env can be reseeded
116 |     env.seed(1)
117 |     vnc_env.seed(1)
118 |     reset(matcher, env, vnc_env, 'reseeded reset')
119 |     rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
120 | 


--------------------------------------------------------------------------------
/universe/vncdriver/libvnc_session.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | from twisted.internet import defer, endpoints
  5 | 
  6 | from universe import error, utils
  7 | from universe.twisty import reactor
  8 | from universe.vncdriver import screen, vnc_client
  9 | 
 10 | PYGAME_INSTALLED = None
 11 | def load_pygame():
 12 |     global PYGAME_INSTALLED, pygame
 13 |     if PYGAME_INSTALLED is not None:
 14 |         return
 15 | 
 16 |     try:
 17 |         import pygame
 18 |         PYGAME_INSTALLED = True
 19 |     except ImportError:
 20 |         PYGAME_INSTALLED = False
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | class LibVNCSession(object):
 26 |     def __init__(self, remotes, error_buffer, encoding=None, compress_level=None, fine_quality_level=None, subsample_level=None):
 27 |         """compress_level: 0-9 [9 is highest compression]
 28 |         fine_quality_level: 0-100 [100 is best quality]
 29 |         subsample_level: 0-3 [0 is best quality]
 30 | 
 31 |         Lots of references for this, but
 32 |         https://github.com/TurboVNC/turbovnc/blob/master/doc/performance.txt
 33 |         is decent.
 34 |         """
 35 | 
 36 |         load_pygame()
 37 |         import libvncdriver
 38 | 
 39 |         if encoding is None:
 40 |             encoding = os.environ.get('LIBVNC_ENCODING', 'tight')
 41 |         if compress_level is None:
 42 |             compress_level = int(os.environ.get('LIBVNC_COMPRESS_LEVEL', '0'))
 43 |         if fine_quality_level is None:
 44 |             fine_quality_level = int(os.environ.get('LIBVNC_FINE_QUALITY_LEVEL', '100'))
 45 |         if subsample_level is None:
 46 |             subsample_level = int(os.environ.get('LIBVNC_SUBSAMPLE_LEVEL', '0'))
 47 | 
 48 |         if not hasattr(libvncdriver, 'VNCSession'):
 49 |             raise error.Error('''
 50 |  *=================================================*
 51 | || libvncdriver is not installed                   ||
 52 | || Try installing with "pip install libvncdriver"  ||
 53 | || or use the go or python driver by setting       ||
 54 | || UNIVERSE_VNCDRIVER=go                                ||
 55 | || UNIVERSE_VNCDRIVER=py                                ||
 56 |  *=================================================*''')
 57 |         logger.info("Using libvncdriver's %s encoding" % encoding)
 58 |         self.driver = libvncdriver.VNCSession(
 59 |             remotes=remotes,
 60 |             error_buffer=error_buffer,
 61 |             encoding=encoding,
 62 |             compress_level=compress_level,
 63 |             fine_quality_level=fine_quality_level,
 64 |             subsample_level=subsample_level,
 65 |         )
 66 |         self.screen = None
 67 |         self.render_called_once = False
 68 |         if PYGAME_INSTALLED:
 69 |             pygame.init()
 70 | 
 71 |     def flip(self):
 72 |         return self._guard(self.driver.flip)
 73 | 
 74 |     def step(self, action):
 75 |         return self.driver.step(action)
 76 | 
 77 |     def render(self):
 78 |         self._guard(self._render)
 79 | 
 80 |     def _guard(self, fn):
 81 |         try:
 82 |             return fn()
 83 |         except (KeyboardInterrupt, SystemExit):
 84 |             self.close()
 85 | 
 86 |     def _render(self):
 87 |         self.before_render()
 88 |         if not PYGAME_INSTALLED:
 89 |             return
 90 |         # For some reason pygame wants X and Y swapped
 91 |         aray, n = self.driver.flip()
 92 |         if self.screen is None:
 93 |             self.screen = pygame.display.set_mode(aray[0].shape[:2][::-1])
 94 |         surf = pygame.surfarray.make_surface(aray[0].swapaxes(0, 1))
 95 |         rect = surf.get_rect()
 96 |         self.screen.blit(surf, rect)
 97 |         pygame.display.flip()
 98 | 
 99 |         for event in pygame.event.get():
100 |             if event.type == pygame.QUIT:
101 |                 self.close()
102 | 
103 |     def before_render(self):
104 |         if not self.render_called_once:
105 |             self.render_called_once = True
106 |             if not PYGAME_INSTALLED:
107 |                 logger.warn('''
108 |  *================================================================*
109 | ||                                                                ||
110 | || Rendering disabled when using libvnc without pygame installed. ||
111 | || Consider viewing over VNC or running "pip install pygame".     ||
112 | ||                                                                ||
113 |  *================================================================*''')
114 | 
115 | 
116 |     def close(self):
117 |         if PYGAME_INSTALLED:
118 |             pygame.quit()
119 |         self.driver.close()
120 | 


--------------------------------------------------------------------------------
/universe/envs/vnc_core_env/translator.py:
--------------------------------------------------------------------------------
  1 | from universe import spaces
  2 | from universe.envs.vnc_core_env import key
  3 | import logging
  4 | 
  5 | logger = logging.getLogger(__name__)
  6 | 
  7 | 
  8 | class AtariKeyState(object):
  9 |     """
 10 |     Converts from VNCEvents to an Atari-v0 action index
 11 | 
 12 |     Since spaces.KeyEvent only give you a diff of a keyboard, we need to persist the total state of the keyboard to
 13 |     convert from VNCEvents to an action index
 14 |     """
 15 |     def __init__(self, env):
 16 |         self._translator = AtariTranslator(env)
 17 |         self._down_keysyms = set()  # Assumes that your env starts with no keys pressed down
 18 | 
 19 |     def apply_vnc_actions(self, vnc_actions):
 20 |         """
 21 |         Play a list of vnc_actions forward over the current keysyms state
 22 | 
 23 |         NOTE: Since we are squashing a set of diffs into a single keyboard state, some information may be lost.
 24 |         For example if the Z key is down, then we receive [(Z-up), (Z-down)], the output will not reflect any change in Z
 25 |         You can make each frame shorter to offset this effect.
 26 |         """
 27 |         for event in vnc_actions:
 28 |             if isinstance(event, spaces.KeyEvent):
 29 |                 if event.down:
 30 |                     self._down_keysyms.add(event.key)
 31 |                 else:
 32 |                     self._down_keysyms.discard(event.key)
 33 | 
 34 |         logger.debug("AtariKeyState._down_keysyms: {}".format(self._down_keysyms))
 35 | 
 36 |     def to_keysyms(self):
 37 |         """Returns the current state as keysyms"""
 38 |         return list(self._down_keysyms)
 39 | 
 40 |     def to_index(self):
 41 |         """Returns the current state as an index"""
 42 |         return self._translator.keysyms_to_index(self.to_keysyms())
 43 | 
 44 | 
 45 | class AtariTranslator(object):
 46 |     """Translates Atari actions to and from various formats"""
 47 |     _all_keysyms = [key.UP, key.DOWN, key.LEFT, key.RIGHT, key.Z]
 48 | 
 49 |     def __init__(self, env):
 50 |         # e.g. {0: 'NOOP', 1: 'FIRE', 2: 'RIGHT', 3: 'LEFT', 4: 'RIGHTFIRE', 5: 'LEFTFIRE'}
 51 |         self._index_to_name_ = {}
 52 |         # e.g. {'RIGHT': 2, 'FIRE': 1, 'RIGHTFIRE': 4, 'LEFTFIRE': 5, 'NOOP': 0, 'LEFT': 3}
 53 |         self._name_to_index_ = {}
 54 | 
 55 |         for i, meaning in enumerate(env.unwrapped.get_action_meanings()):
 56 |             self._name_to_index_[meaning] = i
 57 |             self._index_to_name_[i] = meaning
 58 | 
 59 |     def keysyms_to_vnc_actions(self, keysyms):
 60 |         actions = []
 61 |         keysyms = set(keysyms)
 62 |         for keysym in self._all_keysyms:
 63 |             down = keysym in keysyms
 64 |             actions.append(spaces.KeyEvent(keysym, down=down))
 65 |         return actions
 66 | 
 67 |     def keysyms_to_index(self, keysyms):
 68 |         name = self._keysyms_to_name(keysyms)
 69 |         return self._name_to_index(name)
 70 | 
 71 |     def index_to_keysyms(self, i):
 72 |         name = self._index_to_name(i)
 73 |         keysyms = []
 74 |         if 'UP' in name:
 75 |             keysyms.append(key.UP)
 76 |         if 'DOWN' in name:
 77 |             keysyms.append(key.DOWN)
 78 |         if 'LEFT' in name:
 79 |             keysyms.append(key.LEFT)
 80 |         if 'RIGHT' in name:
 81 |             keysyms.append(key.RIGHT)
 82 |         if 'FIRE' in name:
 83 |             keysyms.append(key.Z)
 84 |         return keysyms
 85 | 
 86 |     def _name_to_index(self, name):
 87 |         return self._name_to_index_.get(name, 0)
 88 | 
 89 |     def _index_to_name(self, i):
 90 |         return self._index_to_name_[i]
 91 | 
 92 |     def _keysyms_to_name(self, keysyms):
 93 |         keys = ''
 94 |         if key.UP in keysyms:
 95 |             keys += 'UP'
 96 |         if key.DOWN in keysyms:
 97 |             keys += 'DOWN'
 98 |         if key.LEFT in keysyms:
 99 |             keys += 'LEFT'
100 |         if key.RIGHT in keysyms:
101 |             keys += 'RIGHT'
102 |         if key.Z in keysyms:
103 |             keys += 'FIRE'
104 |         return keys
105 | 
106 | class CartPoleTranslator(object):
107 |     def __init__(self, env):
108 |         pass
109 | 
110 |     def keysyms_to_vnc_actions(self, keysyms):
111 |         down = key.LEFT in keysyms
112 |         return [spaces.KeyEvent(key.LEFT, down=down)]
113 | 
114 |     def keysyms_to_index(self, keys):
115 |         if key.LEFT in keys:
116 |             return 0
117 |         else:
118 |             return 1
119 | 
120 |     def index_to_keysyms(self, i):
121 |         if i == 0:
122 |             return [key.LEFT]
123 |         else:
124 |             return []
125 | 


--------------------------------------------------------------------------------
/universe/vncdriver/constants.py:
--------------------------------------------------------------------------------
  1 | # Encodings
  2 | RAW_ENCODING =                  0
  3 | COPY_RECTANGLE_ENCODING =       1
  4 | RRE_ENCODING =                  2
  5 | CORRE_ENCODING =                4
  6 | HEXTILE_ENCODING =              5
  7 | ZLIB_ENCODING =                 6
  8 | TIGHT_ENCODING =                7
  9 | ZLIBHEX_ENCODING =              8
 10 | ZRLE_ENCODING =                 16
 11 | #0xffffff00 to 0xffffffff tight options
 12 | PSEUDO_CURSOR_ENCODING =        -239
 13 | 
 14 | # Keycodes
 15 | KEY_BackSpace = 0xff08
 16 | KEY_Tab =       0xff09
 17 | KEY_Return =    0xff0d
 18 | KEY_Escape =    0xff1b
 19 | KEY_Insert =    0xff63
 20 | KEY_Delete =    0xffff
 21 | KEY_Home =      0xff50
 22 | KEY_End =       0xff57
 23 | KEY_PageUp =    0xff55
 24 | KEY_PageDown =  0xff56
 25 | KEY_Left =      0xff51
 26 | KEY_Up =        0xff52
 27 | KEY_Right =     0xff53
 28 | KEY_Down =      0xff54
 29 | KEY_F1 =        0xffbe
 30 | KEY_F2 =        0xffbf
 31 | KEY_F3 =        0xffc0
 32 | KEY_F4 =        0xffc1
 33 | KEY_F5 =        0xffc2
 34 | KEY_F6 =        0xffc3
 35 | KEY_F7 =        0xffc4
 36 | KEY_F8 =        0xffc5
 37 | KEY_F9 =        0xffc6
 38 | KEY_F10 =       0xffc7
 39 | KEY_F11 =       0xffc8
 40 | KEY_F12 =       0xffc9
 41 | KEY_F13 =       0xFFCA
 42 | KEY_F14 =       0xFFCB
 43 | KEY_F15 =       0xFFCC
 44 | KEY_F16 =       0xFFCD
 45 | KEY_F17 =       0xFFCE
 46 | KEY_F18 =       0xFFCF
 47 | KEY_F19 =       0xFFD0
 48 | KEY_F20 =       0xFFD1
 49 | KEY_ShiftLeft = 0xffe1
 50 | KEY_ShiftRight = 0xffe2
 51 | KEY_ControlLeft = 0xffe3
 52 | KEY_ControlRight = 0xffe4
 53 | KEY_MetaLeft =  0xffe7
 54 | KEY_MetaRight = 0xffe8
 55 | KEY_AltLeft =   0xffe9
 56 | KEY_AltRight =  0xffea
 57 | 
 58 | KEY_Scroll_Lock = 0xFF14
 59 | KEY_Sys_Req =   0xFF15
 60 | KEY_Num_Lock =  0xFF7F
 61 | KEY_Caps_Lock = 0xFFE5
 62 | KEY_Pause =     0xFF13
 63 | KEY_Super_L =   0xFFEB
 64 | KEY_Super_R =   0xFFEC
 65 | KEY_Hyper_L =   0xFFED
 66 | KEY_Hyper_R =   0xFFEE
 67 | 
 68 | KEY_KP_0 =      0xFFB0
 69 | KEY_KP_1 =      0xFFB1
 70 | KEY_KP_2 =      0xFFB2
 71 | KEY_KP_3 =      0xFFB3
 72 | KEY_KP_4 =      0xFFB4
 73 | KEY_KP_5 =      0xFFB5
 74 | KEY_KP_6 =      0xFFB6
 75 | KEY_KP_7 =      0xFFB7
 76 | KEY_KP_8 =      0xFFB8
 77 | KEY_KP_9 =      0xFFB9
 78 | KEY_KP_Enter =  0xFF8D
 79 | 
 80 | KEY_ForwardSlash = 0x002F
 81 | KEY_BackSlash = 0x005C
 82 | KEY_SpaceBar=   0x0020
 83 | 
 84 | # TODO: build this programmatically?
 85 | KEYMAP = {
 86 |     'bsp': KEY_BackSpace,
 87 |     'tab': KEY_Tab,
 88 |     'return': KEY_Return,
 89 |     'enter': KEY_Return,
 90 |     'esc': KEY_Escape,
 91 |     'ins': KEY_Insert,
 92 |     'delete': KEY_Delete,
 93 |     'del': KEY_Delete,
 94 |     'home': KEY_Home,
 95 |     'end': KEY_End,
 96 |     'pgup': KEY_PageUp,
 97 |     'pgdn': KEY_PageDown,
 98 |     'ArrowLeft': KEY_Left,
 99 |     'left': KEY_Left,
100 |     'ArrowUp': KEY_Up,
101 |     'up': KEY_Up,
102 |     'ArrowRight': KEY_Right,
103 |     'right': KEY_Right,
104 |     'ArrowDown': KEY_Down,
105 |     'down': KEY_Down,
106 | 
107 |     'slash': KEY_BackSlash,
108 |     'bslash': KEY_BackSlash,
109 |     'fslash': KEY_ForwardSlash,
110 |     'spacebar': KEY_SpaceBar,
111 |     'space': KEY_SpaceBar,
112 |     'sb': KEY_SpaceBar,
113 | 
114 |     'f1': KEY_F1,
115 |     'f2': KEY_F2,
116 |     'f3': KEY_F3,
117 |     'f4': KEY_F4,
118 |     'f5': KEY_F5,
119 |     'f6': KEY_F6,
120 |     'f7': KEY_F7,
121 |     'f8': KEY_F8,
122 |     'f9': KEY_F9,
123 |     'f10': KEY_F10,
124 |     'f11': KEY_F11,
125 |     'f12': KEY_F12,
126 |     'f13': KEY_F13,
127 |     'f14': KEY_F14,
128 |     'f15': KEY_F15,
129 |     'f16': KEY_F16,
130 |     'f17': KEY_F17,
131 |     'f18': KEY_F18,
132 |     'f19': KEY_F19,
133 |     'f20': KEY_F20,
134 | 
135 |     'lshift': KEY_ShiftLeft,
136 |     'shift': KEY_ShiftLeft,
137 |     'rshift': KEY_ShiftRight,
138 |     'lctrl': KEY_ControlLeft,
139 |     'ctrl': KEY_ControlLeft,
140 |     'rctrl': KEY_ControlRight,
141 |     'lmeta': KEY_MetaLeft,
142 |     'meta': KEY_MetaLeft,
143 |     'rmeta': KEY_MetaRight,
144 |     'lalt': KEY_AltLeft,
145 |     'alt': KEY_AltLeft,
146 |     'ralt': KEY_AltRight,
147 |     'scrlk': KEY_Scroll_Lock,
148 |     'sysrq': KEY_Sys_Req,
149 |     'numlk': KEY_Num_Lock,
150 |     'caplk': KEY_Caps_Lock,
151 |     'pause': KEY_Pause,
152 |     'lsuper': KEY_Super_L,
153 |     'super': KEY_Super_L,
154 |     'rsuper': KEY_Super_R,
155 |     'lhyper': KEY_Hyper_L,
156 |     'hyper': KEY_Hyper_L,
157 |     'rhyper': KEY_Hyper_R,
158 | 
159 |     'kp0': KEY_KP_0,
160 |     'kp1': KEY_KP_1,
161 |     'kp2': KEY_KP_2,
162 |     'kp3': KEY_KP_3,
163 |     'kp4': KEY_KP_4,
164 |     'kp5': KEY_KP_5,
165 |     'kp6': KEY_KP_6,
166 |     'kp7': KEY_KP_7,
167 |     'kp8': KEY_KP_8,
168 |     'kp9': KEY_KP_9,
169 |     'kpenter': KEY_KP_Enter,
170 | }
171 | 


--------------------------------------------------------------------------------
/universe/kube/discovery.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import pipes
  4 | import subprocess
  5 | 
  6 | class Error(Exception):
  7 |     pass
  8 | 
  9 | logger = logging.getLogger()
 10 | 
 11 | def pretty_command(command):
 12 |     return ' '.join(pipes.quote(c) for c in command)
 13 | 
 14 | def log_command(command, prefix=''):
 15 |     logger.info('%sExecuting: %s', prefix, pretty_command(command))
 16 | 
 17 | def check_call(command, *args, **kwargs):
 18 |     log_command(command)
 19 |     return subprocess.check_call(command, *args, **kwargs)
 20 | 
 21 | def popen(command, *args, **kwargs):
 22 |     log_command(command)
 23 |     return subprocess.Popen(command, *args, **kwargs)
 24 | 
 25 | def check_with_output(command, *args, **kwargs):
 26 |     log_command(command)
 27 |     proc = subprocess.Popen(command, *args, stdout=subprocess.PIPE, **kwargs)
 28 |     stdout, _ = proc.communicate()
 29 |     if proc.returncode != 0:
 30 |         raise Error('Command {} returned non-zero exit status {}'.format(command, proc.returncode))
 31 |     return stdout
 32 | 
 33 | def interpret_ready(pod):
 34 |     # status:
 35 |     # conditions:
 36 |     # - lastProbeTime: null
 37 |     #   lastTransitionTime: 2016-07-06T05:29:45Z
 38 |     #   message: 'containers with unready status: [xdummy xvnc vnc-atari]'
 39 |     #   reason: ContainersNotReady
 40 |     #   status: "False"
 41 |     #   type: Ready
 42 |     if 'conditions' not in pod['status']:
 43 |         return False
 44 | 
 45 |     ready = [c for c in pod['status']['conditions'] if c['type'] == 'Ready']
 46 |     if not ready:
 47 |         return False
 48 | 
 49 |     return ready[0]['status'] == 'True'
 50 | 
 51 | def interpret_ports(containers):
 52 |     # TODO: clean up hack
 53 |     try:
 54 |         recorder = containers['vnc-recorder']
 55 |     except KeyError:
 56 |         pass
 57 |     else:
 58 |         spec = recorder['ports'][0]
 59 |         assert spec['containerPort'] == 5899
 60 |         return spec['hostPort'], None
 61 | 
 62 |     app = [k for k in containers.keys() if k.startswith('vnc-')]
 63 |     assert len(app) == 1
 64 |     app = app[0]
 65 | 
 66 |     port_mapping = {}
 67 |     for spec in containers[app]['ports']:
 68 |         port_mapping[spec['containerPort']] = spec['hostPort']
 69 |     # vnc, rewarder
 70 |     return port_mapping[5900], port_mapping.get(15900)
 71 | 
 72 | class VNCEnvDiscovery(object):
 73 |     def __init__(self):
 74 |         self.context = 'sci'
 75 |         self.namespace = 'gym'
 76 |         self.kubectl = ['kubectl', '--context', self.context, '--namespace', self.namespace]
 77 | 
 78 |     def discover_batches(self):
 79 |         pods = check_with_output(self.kubectl + ['get', 'pods', '-o', 'json', '-l', 'type=universe'])
 80 |         pods = json.loads(pods)
 81 | 
 82 |         batches = {}
 83 |         for pod in pods['items']:
 84 |             if 'deletionTimestamp' in pod['metadata']:
 85 |                 # Pod has been deleted!
 86 |                 continue
 87 | 
 88 |             batch = pod['metadata']['labels']['batch']
 89 |             if batch not in batches:
 90 |                 batches[batch] = {'count': 0}
 91 |             batches[batch]['count'] += 1
 92 |         return batches
 93 | 
 94 |     def discover(self, batch, force_ready=False):
 95 |         pods = check_with_output(self.kubectl + ['get', 'pods', '-o', 'json', '-l', 'type=universe', '-l', 'batch={}'.format(batch)])
 96 |         pods = json.loads(pods)
 97 | 
 98 |         if len(pods['items']) == 0:
 99 |             raise Error('Incorrect batch id: {}'.format(batch))
100 | 
101 |         remotes = []
102 | 
103 |         for pod in pods['items']:
104 |             name = pod['metadata']['name']
105 |             containers = {}
106 |             for container in pod['spec']['containers']:
107 |                 containers[container['name']] = container
108 |             vnc_port, rewarder_port = interpret_ports(containers)
109 |             node = pod['spec'].get('nodeName')
110 | 
111 |             # Not scheduled on a node yet
112 |             if node is None:
113 |                 if force_ready:
114 |                     raise Error('Not all pods ready: {} is not scheduled on a node yet'.format(name))
115 |                 continue
116 | 
117 |             address = '{}:{}'.format(node, vnc_port)
118 |             if rewarder_port is not None:
119 |                 address += '+{}'.format(rewarder_port)
120 |             spec = {
121 |                 'name': name,
122 |                 'address': address,
123 |                 'ready': interpret_ready(pod),
124 |             }
125 |             remotes.append(spec)
126 |         return remotes
127 | 
128 | vnc_env_discovery = VNCEnvDiscovery()
129 | discover = vnc_env_discovery.discover
130 | discover_batches = vnc_env_discovery.discover_batches
131 | 


--------------------------------------------------------------------------------
/universe/envs/vnc_core_env/vnc_core_env.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | 
  4 | import gym
  5 | from universe import spaces
  6 | from universe.envs import vnc_env
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | class GymCoreEnv(vnc_env.VNCEnv):
 11 |     def __init__(self, gym_core_id, fps=60):
 12 |         super(GymCoreEnv, self).__init__()
 13 | 
 14 |         self.metadata = dict(self.metadata)
 15 |         self.metadata['video.frames_per_second'] = fps
 16 | 
 17 |         self.gym_core_id = gym_core_id
 18 |         self._seed_value = None
 19 | 
 20 |         self.vnc_pixels = True
 21 | 
 22 | class GymCoreSyncEnv(GymCoreEnv):
 23 |     """A synchronized version of the core envs. Its semantics should match
 24 |     that of the core envs. (By default, observations are pixels from
 25 |     the VNC session, but it also supports receiving the normal Gym
 26 |     observations over the rewarder socket.)
 27 | 
 28 |     Provided primarily for testing and debugging.
 29 |     """
 30 | 
 31 |     def __init__(self, gym_core_id, fps=60, vnc_pixels=True):
 32 |         super(GymCoreSyncEnv, self).__init__(gym_core_id, fps=fps)
 33 |         # Metadata has already been cloned
 34 |         self.metadata['semantics.async'] = False
 35 | 
 36 |         self.gym_core_id = gym_core_id
 37 |         self.vnc_pixels = vnc_pixels
 38 | 
 39 |         if not vnc_pixels:
 40 |             self._core_env = gym.spec(gym_core_id).make()
 41 |         else:
 42 |             self._core_env = None
 43 | 
 44 |     def _flip_past(self, when_n):
 45 |         info_n = [{} for i in range(self.n)]
 46 |         while True:
 47 |             observation_n, obs_info_n = self.vnc_session.flip()
 48 |             metadata_n = self.diagnostics.extract_metadata(observation_n)
 49 | 
 50 |             # Save the update count
 51 |             self._propagate_obs_info(info_n, obs_info_n)
 52 | 
 53 |             # All remote times, so no clock skew adjustments needed
 54 |             invalid = []
 55 |             for i, (metadata, when) in enumerate(zip(metadata_n, when_n)):
 56 |                 delta = when - metadata.get('now', 0)
 57 |                 if delta > 0:
 58 |                     invalid.append((i, delta))
 59 |             if not invalid:
 60 |                 break
 61 |             else:
 62 |                 tick = 1./self.metadata['video.frames_per_second']
 63 |                 logger.info('Waiting %sms for the following observations to catch up: %s', int(1000*tick), invalid)
 64 |                 time.sleep(tick)
 65 |         return observation_n, info_n
 66 | 
 67 |     def _reset(self):
 68 |         assert self.rewarder_session
 69 | 
 70 |         result = self.rewarder_session.reset(
 71 |             seed=self._seed_value,
 72 |         )
 73 |         # Clear seed value so we don't double-send it
 74 |         self._seed_value = None
 75 | 
 76 |         # Wait until all the observations have passed the reset_time
 77 |         remote_reset_time = [response['headers']['sent_at'] for _, _, response in result]
 78 |         observation_n, _ = self._flip_past(remote_reset_time)
 79 | 
 80 |         # Double check that our reward queue is empty
 81 |         assert all(c == 0 for c in self.rewarder_session.rewards_count())
 82 | 
 83 |         return self._observation(observation_n)
 84 | 
 85 |     def _observation(self, observation_n):
 86 |         if self.vnc_pixels:
 87 |             return observation_n
 88 |         else:
 89 |             observation_n = self.rewarder_session.pop_observation()
 90 |             assert all(observation is not None for observation in observation_n), 'At least one missing observation: {}'.format(observation_n)
 91 |             return self._core_env.observation_space.from_jsonable(observation_n)
 92 | 
 93 |     def _step(self, action_n):
 94 |         # Add C keypress in order to "commit" the action, as
 95 |         # interpreted by the remote.
 96 |         action_n = [action + [
 97 |             spaces.KeyEvent.by_name('c', down=True),
 98 |             spaces.KeyEvent.by_name('c', down=False)
 99 |         ] for action in action_n]
100 |         # Submit directly to VNC session, without popping rewards
101 |         logger.debug('Submitting actions: %s', action_n)
102 |         action_n = self._compile_actions(action_n)
103 |         _, obs_info_n = self.vnc_session.step(action_n)
104 |         # Wait until the actions have actually happened
105 |         self.rewarder_session.wait(timeout=5)
106 | 
107 |         # TODO: this is now present in the info messages; need to
108 |         # update the implementation.
109 |         when_n = [reward_buffer.info['reward_buffer.remote_time'] for reward_buffer in self.rewarder_session.reward_buffers]
110 |         observation_n, obs_info_n = self._flip_past(when_n)
111 | 
112 |         reward_n, reward_time_n, done_n, info_n = self.rewarder_session.pop()
113 |         self._propagate_obs_info(info_n, obs_info_n)
114 | 
115 |         # Warn if we detect multiple rewards
116 |         if any(info['stats.reward.count'] != 1 for info in info_n):
117 |             # Arrived but there was a bug
118 |             logger.warn('Likely bug: should have received 1 reward for every env, but instead received %s. Current return: observation=%s reward=%s done=%s info=%s', [info['stats.reward.count'] for info in info_n], observation_n, reward_n, done_n, info_n)
119 | 
120 |         return self._observation(observation_n), reward_n, done_n, {'n': info_n}
121 | 


--------------------------------------------------------------------------------
/universe/remotes/hardcoded_addresses.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import re
  4 | import six.moves.urllib.parse as urlparse
  5 | 
  6 | from universe import error, utils
  7 | from universe.remotes import remote
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | class HardcodedAddresses(object):
 12 |     @classmethod
 13 |     def build(cls, remotes, **kwargs):
 14 |         parsed = urlparse.urlparse(remotes)
 15 |         if parsed.scheme != 'vnc':
 16 |             raise error.Error('HardcodedAddresses must be initialized with a string starting with vnc://: {}'.format(remotes))
 17 | 
 18 |         addresses = parsed.netloc.split(',')
 19 |         query = urlparse.parse_qs(parsed.query)
 20 |         # We could support per-backend passwords, but no need for it
 21 |         # right now.
 22 |         password = query.get('password', [utils.default_password()])[0]
 23 |         vnc_addresses, rewarder_addresses = parse_remotes(addresses)
 24 |         res = cls(vnc_addresses, rewarder_addresses, vnc_password=password, rewarder_password=password, **kwargs)
 25 |         return res, res.available_n
 26 | 
 27 |     def __init__(self, vnc_addresses, rewarder_addresses, vnc_password, rewarder_password, start_timeout=None):
 28 |         if vnc_addresses is not None:
 29 |             self.available_n = len(vnc_addresses)
 30 |         elif rewarder_addresses is not None:
 31 |             self.available_n = len(rewarder_addresses)
 32 |         else:
 33 |             assert False
 34 | 
 35 |         self.supports_reconnect = False
 36 |         self.connect_vnc = vnc_addresses is not None
 37 |         self.connect_rewarder = rewarder_addresses is not None
 38 |         if rewarder_addresses is None:
 39 |             logger.info("No rewarder addresses were provided, so this env cannot connect to the remote's rewarder channel, and cannot send control messages (e.g. reset)")
 40 | 
 41 |         self.vnc_addresses = vnc_addresses
 42 |         self.vnc_password = vnc_password
 43 |         self.rewarder_addresses = rewarder_addresses
 44 |         self.rewarder_password = rewarder_password
 45 |         if start_timeout is None:
 46 |             start_timeout = 2 * self.available_n + 5
 47 |         self.start_timeout = start_timeout
 48 | 
 49 |         self._popped = False
 50 | 
 51 |     def pop(self, n=None):
 52 |         if self._popped:
 53 |             assert n is None
 54 |             return []
 55 |         self._popped = True
 56 | 
 57 |         remotes = []
 58 |         for i in range(self.available_n):
 59 |             if self.vnc_addresses is not None:
 60 |                 vnc_address = self.vnc_addresses[i]
 61 |             else:
 62 |                 vnc_address = None
 63 | 
 64 |             if self.rewarder_addresses is not None:
 65 |                 rewarder_address = self.rewarder_addresses[i]
 66 |             else:
 67 |                 rewarder_address = None
 68 | 
 69 |             name = self._handles[i]
 70 |             env = remote.Remote(
 71 |                 handle=self._handles[i],
 72 |                 vnc_address=vnc_address,
 73 |                 vnc_password=self.vnc_password,
 74 |                 rewarder_address=rewarder_address,
 75 |                 rewarder_password=self.rewarder_password,
 76 |             )
 77 |             remotes.append(env)
 78 |         return remotes
 79 | 
 80 |     def allocate(self, handles, initial=False, params={}):
 81 |         if len(handles) > self.available_n:
 82 |             raise error.Error('Requested {} handles, but only have {} envs'.format(len(handles), self.available_n))
 83 |         self.n = len(handles)
 84 |         self._handles = handles
 85 | 
 86 |     def close(self):
 87 |         pass
 88 | 
 89 | def parse_remotes(remotes):
 90 |     # Parse a list of remotes of the form:
 91 |     #
 92 |     # address:vnc_port+rewarder_port (e.g. localhost:5900+15900)
 93 |     #
 94 |     # either vnc_port or rewarder_port can be omitted, but not both
 95 | 
 96 |     all_vnc = None
 97 |     all_rewarder = None
 98 | 
 99 |     vnc_addresses = []
100 |     rewarder_addresses = []
101 | 
102 |     for remote in remotes:
103 |         # Parse off +, then :
104 |         if '+' in remote:
105 |             if all_vnc == False:
106 |                 raise error.Error('Either all or no remotes must have rewarders: {}'.format(remotes))
107 |             all_vnc = True
108 | 
109 |             remote, rewarder_port = remote.split('+')
110 |             if not re.match(r'^[0-9]+$', rewarder_port):
111 |                 raise error.Error('Rewarder port must be an integer, not `{}`: {}'.format(rewarder_port, remotes))
112 |             rewarder_port = int(rewarder_port)
113 |         else:
114 |             if all_vnc == True:
115 |                 raise error.Error('Either all or no remotes must have rewarders: {}'.format(remotes))
116 |             all_vnc = False
117 | 
118 |             rewarder_port = None
119 | 
120 |         if ':' in remote:
121 |             if all_rewarder == False:
122 |                 raise error.Error('Either all or no remotes must have a VNC port: {}'.format(remotes))
123 |             all_rewarder = True
124 | 
125 |             remote, vnc_port = remote.split(':')
126 |             if not re.match(r'^[0-9]+$', vnc_port):
127 |                 raise error.Error('VNC port must be an integer, not `{}`: {}'.format(vnc_port, remotes))
128 |             vnc_port = int(vnc_port)
129 |         else:
130 |             if all_rewarder == True:
131 |                 raise error.Error('Either all or no remotes must have a VNC port: {}'.format(remotes))
132 |             all_rewarder = False
133 | 
134 |             vnc_port = None
135 |             all_rewarder = False
136 | 
137 |         host = remote
138 |         if not re.match(r'^[-a-zA-Z0-9\.\_]+$', host):
139 |             raise error.Error('Invalid hostname for remote: {}'.format(remotes))
140 | 
141 |         if rewarder_port is not None:
142 |             rewarder_address = '{}:{}'.format(host, rewarder_port)
143 |             rewarder_addresses.append(rewarder_address)
144 | 
145 |         if vnc_port is not None:
146 |             vnc_address = '{}:{}'.format(host, vnc_port)
147 |             vnc_addresses.append(vnc_address)
148 | 
149 |     if not all_vnc and not all_rewarder:
150 |         raise error.Error('You must provide either rewarder or a VNC port: {}'.format(remotes))
151 | 
152 |     if not vnc_addresses:
153 |         vnc_addresses = None
154 |     if not rewarder_addresses:
155 |         rewarder_addresses = None
156 |     return vnc_addresses, rewarder_addresses
157 | 


--------------------------------------------------------------------------------
/universe/wrappers/gym_core.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import gym
  3 | import time
  4 | import numpy as np
  5 | from universe import error
  6 | from gym import spaces as gym_spaces
  7 | from universe import spaces
  8 | from universe import rewarder, vectorized
  9 | from universe.envs.vnc_core_env import translator
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | ATARI_HEIGHT = 210
 14 | ATARI_WIDTH = 160
 15 | 
 16 | def atari_vnc(up=False, down=False, left=False, right=False, z=False):
 17 |     return [spaces.KeyEvent.by_name('up', down=up),
 18 |             spaces.KeyEvent.by_name('left', down=left),
 19 |             spaces.KeyEvent.by_name('right', down=right),
 20 |             spaces.KeyEvent.by_name('down', down=down),
 21 |             spaces.KeyEvent.by_name('z', down=z)]
 22 | 
 23 | def gym_core_action_space(gym_core_id):
 24 |     spec = gym.spec(gym_core_id)
 25 | 
 26 |     if spec.id == 'CartPole-v0':
 27 |         return spaces.Hardcoded([[spaces.KeyEvent.by_name('left', down=True)],
 28 |                                  [spaces.KeyEvent.by_name('left', down=False)]])
 29 |     elif spec._entry_point.startswith('gym.envs.atari:'):
 30 |         actions = []
 31 |         env = spec.make()
 32 |         for action in env.unwrapped.get_action_meanings():
 33 |             z = 'FIRE' in action
 34 |             left = 'LEFT' in action
 35 |             right = 'RIGHT' in action
 36 |             up = 'UP' in action
 37 |             down = 'DOWN' in action
 38 |             translated = atari_vnc(up=up, down=down, left=left, right=right, z=z)
 39 |             actions.append(translated)
 40 |         return spaces.Hardcoded(actions)
 41 |     else:
 42 |         raise error.Error('Unsupported env type: {}'.format(spec.id))
 43 | 
 44 | 
 45 | class CropAtari(vectorized.ObservationWrapper):
 46 |     """
 47 | Crop the relevant portion of the monitor where an Atari enviroment resides.
 48 | """
 49 | 
 50 |     def __init__(self, env):
 51 |         super(CropAtari, self).__init__(env)
 52 |         self.observation_space = gym_spaces.Box(0, 255, shape=(ATARI_HEIGHT, ATARI_WIDTH, 3))
 53 | 
 54 |     def _observation(self, observation_n):
 55 |         return [{'vision': ob['vision'][:ATARI_HEIGHT, :ATARI_WIDTH, :]} for ob in observation_n]
 56 | 
 57 | def one_hot(indices, depth):
 58 |     return np.eye(depth)[indices]
 59 | 
 60 | class GymCoreAction(vectorized.ActionWrapper):
 61 |     def __init__(self, env, gym_core_id=None):
 62 |         super(GymCoreAction, self).__init__(env)
 63 | 
 64 |         if gym_core_id is None:
 65 |             # self.spec is None while inside of the make, so we need
 66 |             # to pass gym_core_id in explicitly there. This case will
 67 |             # be hit when instantiating by hand.
 68 |             gym_core_id = self.spec._kwargs['gym_core_id']
 69 | 
 70 |         spec = gym.spec(gym_core_id)
 71 |         raw_action_space = gym_core_action_space(gym_core_id)
 72 | 
 73 |         self._actions = raw_action_space.actions
 74 |         self.action_space = gym_spaces.Discrete(len(self._actions))
 75 | 
 76 |         if spec._entry_point.startswith('gym.envs.atari:'):
 77 |             self.key_state = translator.AtariKeyState(gym.make(gym_core_id))
 78 |         else:
 79 |             self.key_state = None
 80 | 
 81 |     def _action(self, action_n):
 82 |         # Each action might be a length-1 np.array. Cast to int to
 83 |         # avoid warnings.
 84 |         return [self._actions[int(action)] for action in action_n]
 85 | 
 86 |     def _reverse_action(self, action_n):
 87 |         # Only works for core envs currently
 88 |         self.key_state.apply_vnc_actions(action_n)
 89 |         return one_hot(self.key_state.to_index(), self.action_space.n)
 90 | 
 91 | class GymCoreObservation(vectorized.Wrapper):
 92 |     def __init__(self, env, gym_core_id=None):
 93 |         super(GymCoreObservation, self).__init__(env)
 94 | 
 95 |         if gym_core_id is None:
 96 |             # self.spec is None while inside of the make, so we need
 97 |             # to pass gym_core_id in explicitly there. This case will
 98 |             # be hit when instantiating by hand.
 99 |             gym_core_id = self.spec._kwargs['gym_core_id']
100 | 
101 |         self._reward_n = None
102 |         self._done_n = None
103 |         self._info_n = None
104 | 
105 |         self._gym_core_env = gym.spec(gym_core_id).make()
106 | 
107 |     def _reset(self):
108 |         observation_n = self.env.reset()
109 |         self.reward_n = [0] * self.n
110 |         self.done_n = [False] * self.n
111 |         self.info = {'n': [{} for _ in range(self.n)]}
112 |         new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)])
113 |         rewarder.merge_n(
114 |             observation_n, self.reward_n, self.done_n, self.info,
115 |             new_observation_n, new_reward_n, new_done_n, new_info
116 |         )
117 |         return self._observation(self.done_n, self.info)
118 | 
119 |     def _step(self, action_n):
120 |         observation_n, reward_n, done_n, info = self.env.step(action_n)
121 |         if self.reward_n is not None:
122 |             rewarder.merge_n(
123 |                 observation_n, reward_n, done_n, info,
124 |                 [None] * self.n, self.reward_n, self.done_n, self.info,
125 |             )
126 |             self.reward_n = self.done_n = self.info = None
127 |         return self._observation(done_n, info), reward_n, done_n, info
128 | 
129 |     def _observation(self, done_n, info):
130 |         missing = set()
131 | 
132 |         observation_n = [None] * self.n
133 |         for i, (done, info_i) in enumerate(zip(done_n, info['n'])):
134 |             rewarder_observation = info_i.pop('rewarder.observation', None)
135 |             if rewarder_observation is not None:
136 |                 observation, episode_id = rewarder_observation
137 |                 observation_n[i] = self._gym_core_env.observation_space.from_jsonable(observation)
138 | 
139 |                 if done:
140 |                     # Check whether we should mask
141 |                     completed = info_i['env_status.completed_episode_id']
142 |                     # Observation from old!
143 |                     if episode_id == completed:
144 |                         logger.debug('[%d] Masking rewarder_observation on episode boundary', i)
145 |                         observation_n[i] = None
146 |             else:
147 |                 missing.add(i)
148 | 
149 |         if len(missing) > 0:
150 |             logger.debug('Missing rewarder observations: %s', missing)
151 |         return observation_n
152 | 


--------------------------------------------------------------------------------
/universe/wrappers/throttle.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | from universe import pyprofile, rewarder, spaces, vectorized
  4 | 
  5 | logger = logging.getLogger(__name__)
  6 | 
  7 | class Throttle(vectorized.Wrapper):
  8 |     """
  9 |     A env wrapper that makes sending the action ASAP.
 10 | 
 11 |     Previous implementation would sleep first and then call env._step.
 12 |     This implementation calls env._step twice:
 13 |         1. first call submits given action
 14 |         2. after sleeping based on fps, second call submits empty action to
 15 |            receive observation.
 16 | 
 17 |     visual observation from first call is discarded.
 18 |     metadata and rewards from the two calls are merged.
 19 |     text observations are merged as well.
 20 |     """
 21 |     def __init__(self, env):
 22 |         super(Throttle, self).__init__(env)
 23 | 
 24 |         self._steps = None
 25 | 
 26 |     def configure(self, skip_metadata=False, fps='default', **kwargs):
 27 |         if fps == 'default':
 28 |             fps = self.metadata['video.frames_per_second']
 29 |         self.fps = fps
 30 |         self.skip_metadata = skip_metadata
 31 | 
 32 |         self.env.configure(**kwargs)
 33 |         self.diagnostics = self.unwrapped.diagnostics
 34 | 
 35 |     def _reset(self):
 36 |         # We avoid aggregating reward/info across episode boundaries
 37 |         # by caching it on the object
 38 |         self._deferred_reward_n = None
 39 |         self._deferred_done_n = None
 40 |         self._deferred_info_n = None
 41 | 
 42 |         observation = self.env.reset()
 43 |         self._start_timer()
 44 |         return observation
 45 | 
 46 |     def _step(self, action_n):
 47 |         if self._steps is None:
 48 |             self._start_timer()
 49 |         self._steps += 1
 50 | 
 51 |         accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep(action_n)
 52 |         accum_info['throttle.action.available_at'] = time.time()
 53 | 
 54 |         # Record which indexes we were just peeking at, so when we
 55 |         # make the follow-up we'll be sure to peek there too.
 56 |         peek_n = [any(spaces.PeekReward for peek in action) for action in action_n]
 57 | 
 58 |         if self.fps is None:
 59 |             return accum_observation_n, accum_reward_n, accum_done_n, accum_info
 60 | 
 61 |         accum_info['stats.throttle.sleep'] = 0
 62 |         while True:
 63 |             # See how much time we have to idle
 64 |             delta = self._start + 1./self.fps * self._steps - time.time()
 65 | 
 66 |             # The following assumes that our control loop
 67 |             if delta < 0:
 68 |                 # We're out of time. Just get out of here.
 69 |                 delta = abs(delta)
 70 |                 if delta >= 1:
 71 |                     logger.info('Throttle fell behind by %.2fs; lost %.2f frames', delta, self.fps*delta)
 72 |                 pyprofile.timing('vnc_env.Throttle.lost_sleep', delta)
 73 |                 self._start_timer()
 74 |                 break
 75 |             # elif delta < 0.008:
 76 |             #     # Only have 8ms. Let's spend it sleeping, and
 77 |             #     # return an image which may have up to an
 78 |             #     # additional 8ms lag.
 79 |             #     #
 80 |             #     # 8ms is reasonably arbitrary; we just want something
 81 |             #     # that's small where it's not actually going to help
 82 |             #     # if we make another step call. Step with 32 parallel
 83 |             #     # envs takes about 6ms (about half of which is
 84 |             #     # diagnostics, which could be totally async!), so 8 is
 85 |             #     # a reasonable choice for now..
 86 |             #     pyprofile.timing('vnc_env.Throttle.sleep', delta)
 87 |             #     accum_info['stats.throttle.sleep'] += delta
 88 |             #     time.sleep(delta)
 89 |             #     break
 90 |             else:
 91 |                 # We've got plenty of time. Sleep for up to 16ms, and
 92 |                 # then refresh our current frame. We need to
 93 |                 # constantly be calling step so that our lags are
 94 |                 # reported correctly, within 16ms. (The layering is
 95 |                 # such that the vncdriver doesn't know which pixels
 96 |                 # correspond to metadata, and the diagnostics don't
 97 |                 # know when pixels first got painted. So we do our
 98 |                 # best to present frames as they're ready to the
 99 |                 # diagnostics.)
100 |                 delta = min(delta, 0.016)
101 |                 pyprofile.timing('vnc_env.Throttle.sleep', delta)
102 |                 accum_info['stats.throttle.sleep'] += delta
103 |                 time.sleep(delta)
104 | 
105 |                 # We want to merge in the latest reward/done/info so that our
106 |                 # agent has the most up-to-date info post-sleep, but also want
107 |                 # to avoid popping any rewards where done=True (since we'd
108 |                 # have to merge across episode boundaries).
109 |                 action_n = []
110 |                 for done, peek in zip(accum_done_n, peek_n):
111 |                     if done or peek:
112 |                         # No popping of reward/done
113 |                         action_n.append([spaces.PeekReward])
114 |                     else:
115 |                         action_n.append([])
116 | 
117 |                 observation_n, reward_n, done_n, info = self._substep(action_n)
118 | 
119 |                 # Merge observation, rewards and metadata.
120 |                 # Text observation has order in which the messages are sent.
121 |                 rewarder.merge_n(
122 |                     accum_observation_n, accum_reward_n, accum_done_n, accum_info,
123 |                     observation_n, reward_n, done_n, info,
124 |                 )
125 | 
126 |         return accum_observation_n, accum_reward_n, accum_done_n, accum_info
127 | 
128 |     def _substep(self, action_n):
129 |         with pyprofile.push('vnc_env.Throttle.step'):
130 |             start = time.time()
131 |             # Submit the action ASAP, before the thread goes to sleep.
132 |             observation_n, reward_n, done_n, info = self.env.step(action_n)
133 | 
134 |             available_at = info['throttle.observation.available_at'] = time.time()
135 |             if available_at - start > 1:
136 |                 logger.info('env.step took a long time: %.2fs', available_at - start)
137 |             if not self.skip_metadata and self.diagnostics is not None:
138 |                 # Run (slow) diagnostics
139 |                 self.diagnostics.add_metadata(observation_n, info['n'], available_at=available_at)
140 |             return observation_n, reward_n, done_n, info
141 | 
142 |     def _start_timer(self):
143 |         self._start = time.time()
144 |         self._steps = 0
145 | 


--------------------------------------------------------------------------------
/universe/remotes/healthcheck.py:
--------------------------------------------------------------------------------
  1 | import errno
  2 | import logging
  3 | import select
  4 | import socket
  5 | import time
  6 | 
  7 | from universe import error, utils
  8 | from gym.utils import reraise
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | def run(vnc_addresses, rewarder_addresses, timeout=None, start_timeout=None):
 13 |     healthcheck = Healthcheck(vnc_addresses, rewarder_addresses, timeout=timeout, start_timeout=start_timeout)
 14 |     healthcheck.run()
 15 | 
 16 | def host_port(address, default_port=None):
 17 |     split = address.split(':')
 18 |     if len(split) == 1:
 19 |         host = split[0]
 20 |         port = default_port
 21 |     else:
 22 |         host, port = split
 23 |         port = int(port)
 24 |     return host, port
 25 | 
 26 | class Healthcheck(object):
 27 |     def __init__(self, vnc_addresses, rewarder_addresses, timeout=None, start_timeout=None):
 28 |         self.timeout = timeout or (4 * len(vnc_addresses) + 20)
 29 |         self.start_timeout = start_timeout
 30 | 
 31 |         start_time = time.time()
 32 | 
 33 |         self.sockets = {}
 34 |         for address in vnc_addresses:
 35 |             self._register_vnc(address, start_time)
 36 |         for address in rewarder_addresses:
 37 |             self._register_rewarder(address, start_time)
 38 | 
 39 |     def _register_vnc(self, address, start_time=None):
 40 |         if start_time is None:
 41 |             start_time = time.time()
 42 | 
 43 |         host, port = host_port(address, default_port=5900)
 44 | 
 45 |         while True:
 46 |             # In VNC, the server sends bytes upon connection
 47 |             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 48 |             try:
 49 |                 sock.connect((host, port))
 50 |             except (socket.error, socket.gaierror) as e:
 51 |                 # ECONNREFUSED: VNC env hasn't come up yet
 52 |                 # ETIMEDOUT: the packets can't be delivered yet, such as can happen on kubernetes
 53 |                 # gaierror: can't resolve the address yet, which can also happen on kubernetes
 54 |                 expected = socket.errno.ECONNREFUSED == e.errno or socket.errno.ETIMEDOUT == e.errno or isinstance(e, socket.gaierror)
 55 |                 if self.start_timeout is None or not expected:
 56 |                     reraise(suffix='while connecting to VNC server {}'.format(address))
 57 |                 logger.info('VNC server %s did not come up yet (error: %s). Sleeping for 1s.', address, e)
 58 |                 time.sleep(1)
 59 |             else:
 60 |                 break
 61 | 
 62 |             if time.time() - start_time > self.start_timeout:
 63 |                 raise error.Error('VNC server {} did not come up within {}s'.format(address, self.start_timeout))
 64 | 
 65 |         self.sockets[sock] = ('vnc', address)
 66 | 
 67 |     def _register_rewarder(self, address, start_time=None):
 68 |         if start_time is None:
 69 |             start_time = time.time()
 70 | 
 71 |         host, port = host_port(address, default_port=15900)
 72 | 
 73 |         while True:
 74 |             # In WebSockets, the server sends bytes once we've upgraded the protocol
 75 |             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 76 |             try:
 77 |                 sock.connect((host, port))
 78 |             except (socket.error, socket.gaierror) as e:
 79 |                 # ECONNREFUSED: VNC env hasn't come up yet
 80 |                 # ETIMEDOUT: the packets can't be delivered yet, such as can happen on kubernetes
 81 |                 # gaierror: can't resolve the address yet, which can also happen on kubernetes
 82 |                 expected = socket.errno.ECONNREFUSED == e.errno or socket.errno.ETIMEDOUT == e.errno or isinstance(e, socket.gaierror)
 83 |                 if self.start_timeout is None or not expected:
 84 |                     reraise(suffix='while connecting to Rewarder server {}'.format(address))
 85 |                 logger.info('Rewarder server %s did not come up yet (error: %s). Sleeping for 1s.', address, e)
 86 |                 time.sleep(1)
 87 |             else:
 88 |                 break
 89 | 
 90 |             if time.time() - start_time > self.start_timeout:
 91 |                 raise error.Error('Rewarder server {} did not come up within {}s'.format(address, self.start_timeout))
 92 | 
 93 |         # Send a websocket handshake.
 94 |         # https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API/Writing_WebSocket_servers
 95 |         #
 96 |         # The port 10003 is an arbitrary port that we don't actually connect to, but needs to be a valid part
 97 |         # e.g Host: 127.0.0.1:GARBAGE results in the following error: (invalid port 'GARBAGE' in HTTP Host header '127.0.0.1:GARBAGE')
 98 |         sock.send(b'GET / HTTP/1.1\r\nHost: 127.0.0.1:10003\r\nUpgrade: WebSocket\r\nConnection:Upgrade\r\nSec-WebSocket-Key: dGhlIHNhbXBsZSBub25jZQ==\r\nSec-WebSocket-Version: 13\r\nauthorization: ' + utils.basic_auth_encode('openai').encode('utf-8') + b'\r\nopenai-observer: true\r\n\r\n')
 99 |         self.sockets[sock] = ('rewarder', address)
100 | 
101 |     def run(self):
102 |         target = time.time() + self.timeout
103 |         while self.sockets:
104 |             remaining = target - time.time()
105 |             if remaining < 0:
106 |                 break
107 |             ready, _, _ = select.select(self.sockets.keys(), [], [], remaining)
108 | 
109 |             # Go through the readable sockets
110 |             remote_closed = False
111 |             for sock in ready:
112 |                 type, address = self.sockets.pop(sock)
113 | 
114 |                 # Connection was closed; try again.
115 |                 #
116 |                 # This is guaranteed not to block.
117 |                 try:
118 |                     recv = sock.recv(1)
119 |                 except socket.error as e:
120 |                     if e.errno == errno.ECONNRESET:
121 |                         recv = b''
122 |                     else:
123 |                         raise
124 | 
125 |                 if recv == b'':
126 |                     logger.info('Remote closed: address=%s', address)
127 |                     remote_closed = True
128 |                     if type == 'rewarder':
129 |                         self._register_rewarder(address)
130 |                     else:
131 |                         self._register_vnc(address)
132 |                 else:
133 |                     logger.debug('Healthcheck passed for %s %s', type, address)
134 | 
135 |                 sock.close()
136 | 
137 |             if remote_closed:
138 |                 sleep = 1
139 |                 logger.info('At least one sockets was closed by the remote. Sleeping %ds...', sleep)
140 |                 time.sleep(sleep)
141 | 
142 |         if self.sockets:
143 |             raise error.Error('Not all servers came up within {}s: {}'.format(self.timeout, list(self.sockets.values())))
144 | 


--------------------------------------------------------------------------------
/universe/wrappers/experimental/action_space.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import gym
  4 | import numpy as np
  5 | from universe import spaces
  6 | from universe import vectorized
  7 | from universe.wrappers.gym_core import gym_core_action_space
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | def slither_vnc(space=False, left=False, right=False):
 12 |     return [spaces.KeyEvent.by_name('space', down=space),
 13 |             spaces.KeyEvent.by_name('left', down=left),
 14 |             spaces.KeyEvent.by_name('right', down=right)]
 15 | 
 16 | def racing_vnc(up=False, left=False, right=False):
 17 |     return [spaces.KeyEvent.by_name('up', down=up),
 18 |             spaces.KeyEvent.by_name('left', down=left),
 19 |             spaces.KeyEvent.by_name('right', down=right)]
 20 | 
 21 | def platform_vnc(up=False, left=False, right=False, space=False):
 22 |     return [spaces.KeyEvent.by_name('up', down=up),
 23 |             spaces.KeyEvent.by_name('left', down=left),
 24 |             spaces.KeyEvent.by_name('right', down=right),
 25 |             spaces.KeyEvent.by_name('space', down=space)]
 26 | 
 27 | 
 28 | class SafeActionSpace(vectorized.Wrapper):
 29 |     """
 30 |     Recall that every universe environment receives a list of VNC events as action.
 31 |     There exist many environments for which the set of relevant action is much smaller
 32 |     and is known.   For example, Atari environments have a modest number of keys,
 33 |     so this wrapper, when applied to an Atari environment will reduce its action space.
 34 |     Doing so is very convenient for research, since today's RL algorithms rely on random
 35 |     exploration, which is hurt by small action spaces.  As our algorithms get better
 36 |     and we switch to using the raw VNC commands, this wrapper will become less important.
 37 |     """
 38 |     def __init__(self, env):
 39 |         super(SafeActionSpace, self).__init__(env)
 40 | 
 41 |         if self.spec.tags.get('runtime') == 'gym-core':
 42 |             self.action_space = gym_core_action_space(self.spec._kwargs['gym_core_id'])
 43 |         elif self.spec is None:
 44 |             pass
 45 |         elif self.spec.id in ['internet.SlitherIO-v0',
 46 |                               'internet.SlitherIOErmiyaEskandaryBot-v0',
 47 |                               'internet.SlitherIOEasy-v0']:
 48 |             self.action_space = spaces.Hardcoded([slither_vnc(left=True),
 49 |                                                   slither_vnc(right=True),
 50 |                                                   slither_vnc(space=True),
 51 |                                                   slither_vnc(left=True, space=True),
 52 |                                                   slither_vnc(right=True, space=True)])
 53 |         elif self.spec.id in ['flashgames.DuskDrive-v0']:
 54 |             # TODO: be more systematic
 55 |             self.action_space = spaces.Hardcoded([racing_vnc(up=True),
 56 |                                                   racing_vnc(left=True),
 57 |                                                   racing_vnc(right=True)])
 58 |         elif self.spec.id in ['flashgames.RedBeard-v0']:
 59 |             self.action_space = spaces.Hardcoded([platform_vnc(up=True),
 60 |                                                   platform_vnc(left=True),
 61 |                                                   platform_vnc(right=True),
 62 |                                                   platform_vnc(space=True)])
 63 | 
 64 | 
 65 | class SoftmaxClickMouse(vectorized.ActionWrapper):
 66 |     """
 67 |     Creates a Discrete action space of mouse clicks.
 68 | 
 69 |     This wrapper divides the active region into cells and creates an action for
 70 |     each which clicks in the middle of the cell.
 71 |     """
 72 |     def __init__(self, env, active_region=(10, 75 + 50, 10 + 160, 75 + 210), discrete_mouse_step=10, noclick_regions=[]):
 73 |         super(SoftmaxClickMouse, self).__init__(env)
 74 |         logger.info('Using SoftmaxClickMouse with action_region={}, noclick_regions={}'.format(active_region, noclick_regions))
 75 |         xlow, ylow, xhigh, yhigh = active_region
 76 |         xs = range(xlow, xhigh, discrete_mouse_step)
 77 |         ys = range(ylow, yhigh, discrete_mouse_step)
 78 |         self.active_region = active_region
 79 |         self.discrete_mouse_step = discrete_mouse_step
 80 |         self.noclick_regions = noclick_regions
 81 |         self._points = []
 82 |         removed = 0
 83 |         for x in xs:
 84 |             for y in ys:
 85 |                 xc = min(x+int(discrete_mouse_step/2), xhigh-1) # click to center of a cell
 86 |                 yc = min(y+int(discrete_mouse_step/2), yhigh-1)
 87 |                 if any(self.is_contained((xc, yc), r) for r in noclick_regions):
 88 |                     removed += 1
 89 |                     continue
 90 |                 self._points.append((xc, yc))
 91 |         logger.info('SoftmaxClickMouse noclick regions removed {} of {} actions'.format(removed, removed + len(self._points)))
 92 |         self.action_space = gym.spaces.Discrete(len(self._points))
 93 | 
 94 |     def _action(self, action_n):
 95 |         return [self._discrete_to_action(int(i)) for i in action_n]
 96 | 
 97 |     def _discrete_to_action(self, i):
 98 |         xc, yc = self._points[i]
 99 |         return [
100 |             spaces.PointerEvent(xc, yc, buttonmask=0), # release
101 |             spaces.PointerEvent(xc, yc, buttonmask=1), # click
102 |             spaces.PointerEvent(xc, yc, buttonmask=0), # release
103 |         ]
104 | 
105 |     def _reverse_action(self, action):
106 |         xlow, ylow, xhigh, yhigh = self.active_region
107 |         try:
108 |             # find first valid mousedown, ignore everything else
109 |             click_event = next(e for e in action if isinstance(e, spaces.PointerEvent) and e.buttonmask == 1)
110 |             index = self._action_to_discrete(click_event)
111 |             if index is None:
112 |                 return np.zeros(len(self._points))
113 |             else:
114 |                 # return one-hot vector, expected by demo training code
115 |                 # FIXME(jgray): move one-hot translation to separate layer
116 |                 return np.eye(len(self._points))[index]
117 |         except StopIteration:
118 |             # no valid mousedowns
119 |             return np.zeros(len(self._points))
120 | 
121 |     def _action_to_discrete(self, event):
122 |         assert isinstance(event, spaces.PointerEvent)
123 |         x, y = event.x, event.y
124 |         step = self.discrete_mouse_step
125 |         xlow, ylow, xhigh, yhigh = self.active_region
126 |         xc = min((int((x - xlow) / step) * step) + xlow + step / 2, xhigh - 1)
127 |         yc = min((int((y - ylow) / step) * step) + ylow + step / 2, yhigh - 1)
128 |         try:
129 |             return self._points.index((xc, yc))
130 |         except ValueError:
131 |             # ignore clicks outside of active region or in noclick regions
132 |             return None
133 | 
134 |     @classmethod
135 |     def is_contained(cls, point, coords):
136 |         px, py = point
137 |         x, width, y, height = coords
138 |         return x <= px <= x + width and y <= py <= y + height
139 | 


--------------------------------------------------------------------------------
/universe/utils/__init__.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import six
  3 | import sys
  4 | if six.PY2:
  5 |     import Queue as queue
  6 | else:
  7 |     import queue
  8 | import threading
  9 | import signal
 10 | from twisted.internet import defer
 11 | 
 12 | from universe.twisty import reactor
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | class ErrorBuffer(object):
 17 |     def __init__(self):
 18 |         self.queue = queue.Queue()
 19 | 
 20 |     def __enter__(self):
 21 |         pass
 22 | 
 23 |     def __exit__(self, type, value, traceback):
 24 |         if value is not None:
 25 |             self.record(value)
 26 | 
 27 |     def __call__(self, error, wrap=True):
 28 |         self.record(error, wrap=True)
 29 | 
 30 |     def record(self, error, wrap=True):
 31 |         logger.debug('Error in thread %s: %s', threading.current_thread().name, error)
 32 |         if wrap:
 33 |             error = format_error(error)
 34 | 
 35 |         try:
 36 |             self.queue.put_nowait(error)
 37 |         except queue.Full:
 38 |             pass
 39 | 
 40 |     def check(self, timeout=None):
 41 |         if timeout is None:
 42 |             timeout = 0
 43 | 
 44 |         try:
 45 |             error = self.queue.get(timeout=timeout)
 46 |         except queue.Empty:
 47 |             return
 48 |         else:
 49 |             raise error
 50 | 
 51 |     def blocking_check(self, timeout=None):
 52 |         # TODO: get rid of this method
 53 |         if timeout is None:
 54 |             while True:
 55 |                 self.check(timeout=3600)
 56 |         else:
 57 |             self.check(timeout)
 58 | 
 59 | 
 60 | from twisted.python import failure
 61 | import traceback
 62 | import threading
 63 | from universe import error
 64 | def format_error(e):
 65 |     # errback automatically wraps everything in a Twisted Failure
 66 |     if isinstance(e, failure.Failure):
 67 |         e = e.value
 68 | 
 69 |     if isinstance(e, str):
 70 |         err_string = e
 71 |     elif six.PY2:
 72 |         err_string = traceback.format_exc(e).rstrip()
 73 |     else:
 74 |         err_string = ''.join(traceback.format_exception(type(e), e, e.__traceback__)).rstrip()
 75 | 
 76 |     if err_string == 'None':
 77 |         # Reasonable heuristic for exceptions that were created by hand
 78 |         last = traceback.format_stack()[-2]
 79 |         err_string = '{}\n  {}'.format(e, last)
 80 |     # Quick and dirty hack for now.
 81 |     err_string = err_string.replace('Connection to the other side was lost in a non-clean fashion', 'Connection to the other side was lost in a non-clean fashion (HINT: this generally actually means we got a connection refused error. Check that the remote is actually running.)')
 82 |     return error.Error(err_string)
 83 | 
 84 | def queue_get(local_queue):
 85 |     while True:
 86 |         try:
 87 |             result = local_queue.get(timeout=1000)
 88 |         except queue.Empty:
 89 |             pass
 90 |         else:
 91 |             return result
 92 | 
 93 | def blockingCallFromThread(f, *a, **kw):
 94 |     local_queue = queue.Queue()
 95 |     def _callFromThread():
 96 |         result = defer.maybeDeferred(f, *a, **kw)
 97 |         result.addBoth(local_queue.put)
 98 |     reactor.callFromThread(_callFromThread)
 99 |     result = queue_get(local_queue)
100 |     if isinstance(result, failure.Failure):
101 |         if result.frames:
102 |             e = error.Error(str(result))
103 |         else:
104 |             e = result.value
105 |         raise e
106 |     return result
107 | 
108 | from gym import spaces
109 | def repeat_space(space, n):
110 |     return spaces.Tuple([space] * n)
111 | 
112 | import base64
113 | import uuid
114 | def random_alphanumeric(length=14):
115 |     buf = []
116 |     while len(buf) < length:
117 |         entropy = base64.encodestring(uuid.uuid4().bytes).decode('ascii')
118 |         bytes = [c for c in entropy if c.isalnum()]
119 |         buf += bytes
120 |     return ''.join(buf)[:length]
121 | 
122 | 
123 | def best_effort(function, *args, **kwargs):
124 |     try:
125 |         return function(*args, **kwargs)
126 |     except:
127 |         if six.PY2:
128 |             logging.error('Error in %s:', function.__name__)
129 |             traceback.print_exc()
130 |         else:
131 |             logging.error('Error in %s:', function.__name__)
132 |             logger.error(traceback.format_exc())
133 |         return None
134 | 
135 | import base64
136 | def basic_auth_encode(username, password=''):
137 |     fmt = '{}:{}'.format(username, password)
138 |     return 'Basic ' + base64.encodestring(fmt.encode('utf-8')).rstrip().decode('utf-8')
139 | 
140 | def basic_auth_decode(header):
141 |     if header.startswith('Basic '):
142 |         header = header[len('Basic '):]
143 |         decoded = base64.decodestring(header.encode('utf-8')).decode('utf-8')
144 |         username, password = decoded.split(':')
145 |         return username, password
146 |     else:
147 |         return None
148 | 
149 | import os
150 | def default_password():
151 |     if os.path.exists('/usr/local/openai/privileged_state/password'):
152 |         with open('/usr/local/openai/privileged_state/password') as f:
153 |             return f.read().strip()
154 |     return 'openai'
155 | 
156 | import logging
157 | import time
158 | logger = logging.getLogger(__name__)
159 | class PeriodicLog(object):
160 |     def log(self, obj, name, msg, *args, **kwargs):
161 |         try:
162 |             info = obj._periodic_log_info
163 |         except AttributeError:
164 |             info = obj._periodic_log_info = {}
165 | 
166 |         # Would be better to use a frequency=... arg after kwargs, but
167 |         # that isn't py2 compatible.
168 |         frequency = kwargs.pop('frequency', 1)
169 |         delay = kwargs.pop('delay', 0)
170 |         last_log = info.setdefault(name, time.time()-frequency+delay)
171 |         if time.time() - last_log < frequency:
172 |             return
173 |         info[name] = time.time()
174 |         logger.info('[{}] {}'.format(name, msg), *args)
175 | 
176 |     def log_debug(self, obj, name, msg, *args, **kwargs):
177 |         try:
178 |             info = obj._periodic_log_debug
179 |         except AttributeError:
180 |             info = obj._periodic_log_debug = {}
181 | 
182 |         frequency = kwargs.pop('frequency', 1)
183 |         delay = kwargs.pop('delay', 0)
184 |         last_log = info.setdefault(name, time.time()-frequency+delay)
185 |         if time.time() - last_log < frequency:
186 |             return
187 |         info[name] = time.time()
188 |         logger.debug('[{}] {}'.format(name, msg), *args)
189 | 
190 | _periodic = PeriodicLog()
191 | periodic_log = _periodic.log
192 | periodic_log_debug = _periodic.log_debug
193 | 
194 | import threading
195 | def thread_name():
196 |     return threading.current_thread().name
197 | 
198 | def exit_on_signal():
199 |     """
200 |     Install a signal handler for HUP, INT, and TERM to call exit, allowing clean shutdown.
201 |     When running a universe environment, it's important to shut down the container when the
202 |     agent dies so you should either call this or otherwise arrange to exit on signals.
203 |     """
204 |     def shutdown(signal, frame):
205 |         logger.warn('Received signal %s: exiting', signal)
206 |         sys.exit(128+signal)
207 |     signal.signal(signal.SIGHUP, shutdown)
208 |     signal.signal(signal.SIGINT, shutdown)
209 |     signal.signal(signal.SIGTERM, shutdown)
210 | 


--------------------------------------------------------------------------------