├── .gitignore ├── LICENSE.md ├── README.md ├── __init__.py ├── pybulletgym ├── __init__.py ├── agents │ ├── __init__.py │ ├── agent_register.py │ ├── agents_baselines.py │ └── agents_kerasrl.py ├── envs │ ├── __init__.py │ ├── assets │ │ ├── __init__.py │ │ ├── mjcf │ │ │ ├── ant.xml │ │ │ ├── capsule.xml │ │ │ ├── capsule_fromtoX.xml │ │ │ ├── capsule_fromtoY.xml │ │ │ ├── capsule_fromtoZ.xml │ │ │ ├── cylinder.xml │ │ │ ├── cylinder_fromtoX.xml │ │ │ ├── cylinder_fromtoY.xml │ │ │ ├── cylinder_fromtoZ.xml │ │ │ ├── ground.xml │ │ │ ├── ground_plane.xml │ │ │ ├── half_cheetah.xml │ │ │ ├── hello_mjcf.xml │ │ │ ├── hopper.xml │ │ │ ├── humanoid.xml │ │ │ ├── humanoid_fixed.xml │ │ │ ├── humanoid_symmetric.xml │ │ │ ├── humanoid_symmetric_no_ground.xml │ │ │ ├── inverted_double_pendulum.xml │ │ │ ├── inverted_pendulum.xml │ │ │ ├── pusher.xml │ │ │ ├── reacher.xml │ │ │ ├── striker.xml │ │ │ ├── swimmer.xml │ │ │ ├── thrower.xml │ │ │ └── walker2d.xml │ │ ├── robots │ │ │ └── atlas │ │ │ │ └── atlas_description │ │ │ │ ├── atlas_v4_with_multisense.urdf │ │ │ │ ├── materials │ │ │ │ └── textures │ │ │ │ │ ├── drc_extremities_diffuse.jpg │ │ │ │ │ ├── extremities_diffuse_unplugged.jpg │ │ │ │ │ ├── right_leg_diffuse_unplugged.jpg │ │ │ │ │ └── torso_diffuse_unplugged.jpg │ │ │ │ ├── meshes_unplugged │ │ │ │ ├── l_foot.dae │ │ │ │ ├── l_lglut.dae │ │ │ │ ├── l_lleg.dae │ │ │ │ ├── l_talus.dae │ │ │ │ ├── l_uglut.dae │ │ │ │ ├── l_uleg.dae │ │ │ │ ├── ltorso.dae │ │ │ │ ├── mtorso.dae │ │ │ │ ├── pelvis.dae │ │ │ │ ├── r_clav.dae │ │ │ │ ├── r_foot.dae │ │ │ │ ├── r_lglut.dae │ │ │ │ ├── r_lleg.dae │ │ │ │ ├── r_scap.dae │ │ │ │ ├── r_talus.dae │ │ │ │ ├── r_uarm.dae │ │ │ │ ├── r_uglut.dae │ │ │ │ ├── r_uleg.dae │ │ │ │ └── utorso.dae │ │ │ │ ├── meshes_v3 │ │ │ │ ├── r_farm.dae │ │ │ │ ├── r_hand.dae │ │ │ │ └── r_larm.dae │ │ │ │ └── multisense_sl_description │ │ │ │ ├── materials │ │ │ │ └── textures │ │ │ │ │ └── drc_torso_head_diffuse.jpg │ │ │ │ └── meshes │ │ │ │ ├── head.dae │ │ │ │ └── head_camera.dae │ │ ├── scenes │ │ │ └── stadium │ │ │ │ ├── part0.obj │ │ │ │ ├── part1.obj │ │ │ │ ├── part2.obj │ │ │ │ ├── plane100.obj │ │ │ │ ├── plane100.urdf │ │ │ │ ├── plane_stadium.sdf │ │ │ │ ├── stadium.mtl │ │ │ │ ├── stadium.obj │ │ │ │ ├── stadium.sdf │ │ │ │ └── stadium_grass.jpg │ │ └── things │ │ │ ├── cube_small.urdf │ │ │ └── sphere_small.urdf │ ├── gym_utils.py │ ├── mujoco │ │ ├── __init__.py │ │ ├── envs │ │ │ ├── __init__.py │ │ │ ├── env_bases.py │ │ │ ├── locomotion │ │ │ │ ├── __init__.py │ │ │ │ ├── ant_env.py │ │ │ │ ├── half_cheetah_env.py │ │ │ │ ├── hopper_env.py │ │ │ │ ├── humanoid_env.py │ │ │ │ ├── walker2d_env.py │ │ │ │ └── walker_base_env.py │ │ │ ├── manipulation │ │ │ │ ├── __init__.py │ │ │ │ ├── pusher_env.py │ │ │ │ ├── reacher_env.py │ │ │ │ ├── striker_env.py │ │ │ │ └── thrower_env.py │ │ │ └── pendulum │ │ │ │ ├── __init__.py │ │ │ │ ├── inverted_double_pendulum_env.py │ │ │ │ └── inverted_pendulum_env.py │ │ ├── robots │ │ │ ├── __init__.py │ │ │ ├── locomotors │ │ │ │ ├── __init__.py │ │ │ │ ├── ant.py │ │ │ │ ├── half_cheetah.py │ │ │ │ ├── hopper.py │ │ │ │ ├── humanoid.py │ │ │ │ ├── walker2d.py │ │ │ │ └── walker_base.py │ │ │ ├── manipulators │ │ │ │ ├── __init__.py │ │ │ │ ├── pusher.py │ │ │ │ ├── reacher.py │ │ │ │ ├── striker.py │ │ │ │ └── thrower.py │ │ │ ├── pendula │ │ │ │ ├── __init__.py │ │ │ │ ├── inverted_double_pendulum.py │ │ │ │ └── inverted_pendulum.py │ │ │ └── robot_bases.py │ │ └── scenes │ │ │ ├── __init__.py │ │ │ ├── scene_bases.py │ │ │ └── stadium.py │ └── roboschool │ │ ├── __init__.py │ │ ├── envs │ │ ├── __init__.py │ │ ├── env_bases.py │ │ ├── locomotion │ │ │ ├── __init__.py │ │ │ ├── ant_env.py │ │ │ ├── atlas_env.py │ │ │ ├── half_cheetah_env.py │ │ │ ├── hopper_env.py │ │ │ ├── humanoid_env.py │ │ │ ├── humanoid_flagrun_env.py │ │ │ ├── walker2d_env.py │ │ │ └── walker_base_env.py │ │ ├── manipulation │ │ │ ├── __init__.py │ │ │ ├── pusher_env.py │ │ │ ├── reacher_env.py │ │ │ ├── striker_env.py │ │ │ └── thrower_env.py │ │ └── pendulum │ │ │ ├── __init__.py │ │ │ ├── inverted_double_pendulum_env.py │ │ │ └── inverted_pendulum_env.py │ │ ├── robots │ │ ├── __init__.py │ │ ├── locomotors │ │ │ ├── __init__.py │ │ │ ├── ant.py │ │ │ ├── atlas.py │ │ │ ├── half_cheetah.py │ │ │ ├── hopper.py │ │ │ ├── humanoid.py │ │ │ ├── humanoid_flagrun.py │ │ │ ├── walker2d.py │ │ │ └── walker_base.py │ │ ├── manipulators │ │ │ ├── __init__.py │ │ │ ├── pusher.py │ │ │ ├── reacher.py │ │ │ ├── striker.py │ │ │ └── thrower.py │ │ ├── pendula │ │ │ ├── __init__.py │ │ │ ├── interted_pendulum.py │ │ │ └── inverted_double_pendulum.py │ │ └── robot_bases.py │ │ └── scenes │ │ ├── __init__.py │ │ ├── scene_bases.py │ │ └── stadium.py ├── examples │ ├── __init__.py │ ├── roboschool-weights │ │ ├── __init__.py │ │ ├── enjoy_TF_AntPyBulletEnv_v0_2017may.py │ │ ├── enjoy_TF_AtlasPyBulletEnv_v0_2017jul.py │ │ ├── enjoy_TF_HalfCheetahPyBulletEnv_v0_2017may.py │ │ ├── enjoy_TF_HopperPyBulletEnv_v0_2017may.py │ │ ├── enjoy_TF_HumanoidFlagrunHarderPyBulletEnv_v0_2017may.py │ │ ├── enjoy_TF_HumanoidFlagrunHarderPyBulletEnv_v1_2017jul.py │ │ ├── enjoy_TF_HumanoidFlagrunPyBulletEnv_v0_2017may.py │ │ ├── enjoy_TF_HumanoidPyBulletEnv_v0_2017may.py │ │ ├── enjoy_TF_InvertedDoublePendulumPyBulletEnv_v0_2017may.py │ │ ├── enjoy_TF_InvertedPendulumPyBulletEnv_v0_2017may.py │ │ ├── enjoy_TF_InvertedPendulumSwingupPyBulletEnv_v0_2017may.py │ │ ├── enjoy_TF_ReacherPyBulletEnv_v0_2017may.py │ │ └── enjoy_TF_Walker2DPyBulletEnv_v0_2017may.py │ └── tensorforce │ │ ├── __init__.py │ │ ├── checkpoints │ │ ├── mujoco-invdpendulumv2 │ │ │ ├── checkpoint │ │ │ ├── ppo-mujoco-invdpendulumv2-58011633.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invdpendulumv2-58011633.index │ │ │ ├── ppo-mujoco-invdpendulumv2-58011633.meta │ │ │ ├── ppo-mujoco-invdpendulumv2-58059089.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invdpendulumv2-58059089.index │ │ │ ├── ppo-mujoco-invdpendulumv2-58059089.meta │ │ │ ├── ppo-mujoco-invdpendulumv2-58112398.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invdpendulumv2-58112398.index │ │ │ ├── ppo-mujoco-invdpendulumv2-58112398.meta │ │ │ ├── ppo-mujoco-invdpendulumv2-58160617.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invdpendulumv2-58160617.index │ │ │ ├── ppo-mujoco-invdpendulumv2-58160617.meta │ │ │ ├── ppo-mujoco-invdpendulumv2-58222240.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invdpendulumv2-58222240.index │ │ │ └── ppo-mujoco-invdpendulumv2-58222240.meta │ │ └── mujoco-invpendulumv2 │ │ │ ├── checkpoint │ │ │ ├── ppo-mujoco-invpendulumv2-12475216.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invpendulumv2-12475216.index │ │ │ ├── ppo-mujoco-invpendulumv2-12475216.meta │ │ │ ├── ppo-mujoco-invpendulumv2-12555996.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invpendulumv2-12555996.index │ │ │ ├── ppo-mujoco-invpendulumv2-12555996.meta │ │ │ ├── ppo-mujoco-invpendulumv2-12635251.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invpendulumv2-12635251.index │ │ │ ├── ppo-mujoco-invpendulumv2-12635251.meta │ │ │ ├── ppo-mujoco-invpendulumv2-12706882.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invpendulumv2-12706882.index │ │ │ ├── ppo-mujoco-invpendulumv2-12706882.meta │ │ │ ├── ppo-mujoco-invpendulumv2-12789838.data-00000-of-00001 │ │ │ ├── ppo-mujoco-invpendulumv2-12789838.index │ │ │ └── ppo-mujoco-invpendulumv2-12789838.meta │ │ ├── configs │ │ ├── cnn_dqn2013_network.json │ │ ├── cnn_dqn_network.json │ │ ├── cnn_network_2048.json │ │ ├── ddpg.json │ │ ├── dqfd.json │ │ ├── dqn.json │ │ ├── dqn_ue4.json │ │ ├── dqn_visual.json │ │ ├── mlp2_embedding_network.json │ │ ├── mlp2_lstm_network.json │ │ ├── mlp2_network.json │ │ ├── mlp2_normalized_network.json │ │ ├── naf.json │ │ ├── ppo.json │ │ ├── ppo_cnn.json │ │ ├── trpo.json │ │ ├── vpg.json │ │ └── vpg_baseline_visual.json │ │ └── openai_gym.py ├── tests │ ├── __init__.py │ ├── roboschool │ │ ├── __init__.py │ │ └── agents │ │ │ ├── AntPyBulletEnv_v0_2017may.py │ │ │ ├── AtlasPyBulletEnv_v0_2017jul.py │ │ │ ├── HalfCheetahPyBulletEnv_v0_2017may.py │ │ │ ├── HopperPyBulletEnv_v0_2017may.py │ │ │ ├── HumanoidFlagrunHarderPyBulletEnv_v0_2017may.py │ │ │ ├── HumanoidFlagrunHarderPyBulletEnv_v1_2017jul.py │ │ │ ├── HumanoidFlagrunPyBulletEnv_v0_2017may.py │ │ │ ├── HumanoidPyBulletEnv_v0_2017may.py │ │ │ ├── InvertedDoublePendulumPyBulletEnv_v0_2017may.py │ │ │ ├── InvertedPendulumPyBulletEnv_v0_2017may.py │ │ │ ├── InvertedPendulumSwingupPyBulletEnv_v0_2017may.py │ │ │ ├── ReacherPyBulletEnv_v0_017may.py │ │ │ ├── Walker2DPyBulletEnv_v0_2017may.py │ │ │ ├── __init__.py │ │ │ └── policies.py │ ├── test_pybulletgym_mujoco_sanity.py │ ├── test_pybulletgym_roboschool_performance.py │ └── test_pybylletgym_roboschool_sanity.py └── utils │ ├── __init__.py │ ├── kerasrl_utils.py │ └── robot_dev_util.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # intellij idea 104 | .idea/ 105 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # pybullet-gym 2 | 3 | MIT License 4 | 5 | Copyright (c) 2018 Benjamin Ellenberger 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | 25 | # Mujoco models 26 | 27 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license: 28 | 29 | ``` 30 | This file is part of MuJoCo. 31 | Copyright 2009-2015 Roboti LLC. 32 | Mujoco :: Advanced physics simulation engine 33 | Source : www.roboti.us 34 | Version : 1.31 35 | Released : 23Apr16 36 | Author :: Vikash Kumar 37 | Contacts : kumar@roboti.us 38 | ``` 39 | 40 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/__init__.py -------------------------------------------------------------------------------- /pybulletgym/__init__.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs import register # this is included to trigger env loading -------------------------------------------------------------------------------- /pybulletgym/agents/__init__.py: -------------------------------------------------------------------------------- 1 | # ---- register agents ---------- 2 | import agent_register 3 | 4 | # agent_register.register( 5 | # id='BaselinesDQNAgent-v0', 6 | # entry_point='agents_baselines:BaselinesDQNAgent' 7 | # ) 8 | 9 | agent_register.register( 10 | id='KerasCEMAgent-v0', 11 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasCEMAgent' 12 | ) 13 | 14 | agent_register.register( 15 | id='KerasDDPGAgent-v0', 16 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasDDPGAgent' 17 | ) 18 | 19 | agent_register.register( 20 | id='KerasDDQNAgent-v0', 21 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasDDQNAgent' 22 | ) 23 | 24 | agent_register.register( 25 | id='KerasDQNAgent-v0', 26 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasDQNAgent' 27 | ) 28 | 29 | agent_register.register( 30 | id='KerasNAFAgent-v0', 31 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasNAFAgent' 32 | ) 33 | 34 | # from agents_baselines import BaselinesDQNAgent 35 | from agents_kerasrl import KerasCEMAgent, KerasDDPGAgent, KerasDDQNAgent, KerasDQNAgent, KerasNAFAgent 36 | -------------------------------------------------------------------------------- /pybulletgym/agents/agent_register.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pkg_resources 3 | import re 4 | from gym import error 5 | import warnings 6 | 7 | logger = logging.getLogger(__name__) 8 | # This format is true today, but it's *not* an official spec. 9 | # [username/](agent-name)-v(version) env-name is group 1, version is group 2 10 | # 11 | # 2017-08-26: We're experimentally expanding the agent ID format 12 | # to include an optional username. 13 | agent_id_re = re.compile(r'^(?:[\w:-]+\/)?([\w:.-]+)-v(\d+)$') 14 | 15 | def load(name): 16 | entry_point = pkg_resources.EntryPoint.parse('x={}'.format(name)) 17 | result = entry_point.load(False) 18 | return result 19 | 20 | class AgentSpec(object): 21 | """A specification for a particular instance of the agent. Used 22 | to register the parameters for official evaluations. 23 | Args: 24 | id (str): The official agent ID 25 | entry_point (Optional[str]): The Python entrypoint of the agent class (e.g. module.name:Class) 26 | kwargs (dict): The kwargs to pass to the agent class 27 | Attributes: 28 | id (str): The official agent ID 29 | """ 30 | 31 | def __init__(self, id, entry_point=None, kwargs=None): 32 | self.id = id 33 | 34 | # We may make some of these other parameters public if they're 35 | # useful. 36 | match = agent_id_re.search(id) 37 | if not match: 38 | raise error.Error('Attempted to register malformed agent ID: {}. (Currently all IDs must be of the form {}.)'.format(id, agent_id_re.pattern)) 39 | self._agent_name = match.group(1) 40 | self._entry_point = entry_point 41 | self._kwargs = {} if kwargs is None else kwargs 42 | 43 | def make(self, **kwargs): 44 | """Instantiates an instance of the agent with appropriate kwargs""" 45 | if self._entry_point is None: 46 | raise error.Error('Attempting to make deprecated agent {}. (HINT: is there a newer registered version of this agent?)'.format(self.id)) 47 | 48 | cls = load(self._entry_point) 49 | agent = cls(**kwargs) 50 | 51 | # Make the agent aware of which spec it came from. 52 | #agent.unwrapped._spec = self 53 | 54 | return agent 55 | 56 | def __repr__(self): 57 | return "AgentSpec({})".format(self.id) 58 | 59 | class AgentRegistry(object): 60 | """Register an agent by ID. 61 | """ 62 | 63 | def __init__(self): 64 | self.agent_specs = {} 65 | 66 | def make(self, id, **kwargs): 67 | logger.info('Making new agent: %s', id) 68 | spec = self.spec(id) 69 | agent = spec.make(**kwargs) 70 | 71 | return agent 72 | 73 | 74 | def all(self): 75 | return self.agent_specs.values() 76 | 77 | def spec(self, id): 78 | match = agent_id_re.search(id) 79 | if not match: 80 | raise error.Error('Attempted to look up malformed agent ID: {}. (Currently all IDs must be of the form {}.)'.format(id.encode('utf-8'), agent_id_re.pattern)) 81 | 82 | try: 83 | return self.agent_specs[id] 84 | except KeyError: 85 | # Parse the agent name and check to see if it matches the non-version 86 | # part of a valid agent (could also check the exact number here) 87 | agent_name = match.group(1) 88 | matching_agents = [valid_agent_name for valid_agent_name, valid_agent_spec in self.agent_specs.items() 89 | if agent_name == valid_agent_spec._agent_name] 90 | if matching_agents: 91 | raise error.DeprecatedEnv('Agent {} not found (valid versions include {})'.format(id, matching_agents)) 92 | else: 93 | raise error.UnregisteredEnv('No registered agent with id: {}'.format(id)) 94 | 95 | def register(self, id, **kwargs): 96 | if id in self.agent_specs: 97 | raise error.Error('Cannot re-register id: {}'.format(id)) 98 | self.agent_specs[id] = AgentSpec(id, **kwargs) 99 | 100 | # Have a global registry 101 | agent_registry = AgentRegistry() 102 | 103 | def register(id, **kwargs): 104 | return agent_registry.register(id, **kwargs) 105 | 106 | def make(id, **kwargs): 107 | return agent_registry.make(id, **kwargs) 108 | 109 | def spec(id): 110 | return agent_registry.spec(id) 111 | 112 | -------------------------------------------------------------------------------- /pybulletgym/agents/agents_baselines.py: -------------------------------------------------------------------------------- 1 | from baselines import deepq 2 | 3 | 4 | def add_opts(parser): 5 | pass 6 | 7 | 8 | class BaselinesDQNAgent(object): 9 | ''' 10 | classdocs 11 | ''' 12 | 13 | def __init__(self, opts): 14 | self.metadata = { 15 | 'discrete_actions': True, 16 | } 17 | 18 | self.opts = opts 19 | self.agent = None 20 | 21 | def configure(self, observation_space_shape, nb_actions): 22 | pass 23 | 24 | def train(self, env, nb_steps, visualize, verbosity): 25 | model = deepq.models.mlp([64]) 26 | self.agent = deepq.learn( 27 | env, 28 | q_func=model, 29 | lr=1e-3, 30 | max_timesteps=nb_steps, 31 | buffer_size=50000, 32 | exploration_fraction=0.1, 33 | exploration_final_eps=0.02, 34 | print_freq=10 if verbosity else None, 35 | callback=env.render if visualize else None 36 | ) 37 | 38 | def test(self, env, nb_episodes, visualize): 39 | episodes = 0 40 | while episodes < nb_episodes: 41 | obs, done = env.reset(), False 42 | episode_rew = 0 43 | while not done: 44 | if visualize: 45 | env.render() 46 | obs, rew, done, _ = env.step(self.agent(obs[None])[0]) 47 | episode_rew += rew 48 | print("Episode reward", episode_rew) 49 | episodes += 1 50 | 51 | def load_weights(self, load_file): 52 | self.agent = deepq.load(load_file) 53 | 54 | def save_weights(self, save_file, overwrite): 55 | self.agent.save(save_file) -------------------------------------------------------------------------------- /pybulletgym/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | # roboschool envs 4 | ## pendula 5 | register( 6 | id='InvertedPendulumPyBulletEnv-v0', 7 | entry_point='pybulletgym.envs.roboschool.envs.pendulum.inverted_pendulum_env:InvertedPendulumBulletEnv', 8 | max_episode_steps=1000, 9 | reward_threshold=950.0, 10 | ) 11 | 12 | register( 13 | id='InvertedDoublePendulumPyBulletEnv-v0', 14 | entry_point='pybulletgym.envs.roboschool.envs.pendulum.inverted_double_pendulum_env:InvertedDoublePendulumBulletEnv', 15 | max_episode_steps=1000, 16 | reward_threshold=9100.0, 17 | ) 18 | 19 | register( 20 | id='InvertedPendulumSwingupPyBulletEnv-v0', 21 | entry_point='pybulletgym.envs.roboschool.envs.pendulum.inverted_pendulum_env:InvertedPendulumSwingupBulletEnv', 22 | max_episode_steps=1000, 23 | reward_threshold=800.0, 24 | ) 25 | 26 | 27 | ## manipulators 28 | register( 29 | id='ReacherPyBulletEnv-v0', 30 | entry_point='pybulletgym.envs.roboschool.envs.manipulation.reacher_env:ReacherBulletEnv', 31 | max_episode_steps=150, 32 | reward_threshold=18.0, 33 | ) 34 | 35 | register( 36 | id='PusherPyBulletEnv-v0', 37 | entry_point='pybulletgym.envs.roboschool.envs.manipulation.pusher_env:PusherBulletEnv', 38 | max_episode_steps=150, 39 | reward_threshold=18.0, 40 | ) 41 | 42 | register( 43 | id='ThrowerPyBulletEnv-v0', 44 | entry_point='pybulletgym.envs.roboschool.envs.manipulation.thrower_env:ThrowerBulletEnv', 45 | max_episode_steps=100, 46 | reward_threshold=18.0, 47 | ) 48 | 49 | register( 50 | id='StrikerPyBulletEnv-v0', 51 | entry_point='pybulletgym.envs.roboschool.envs.manipulation.striker_env:StrikerBulletEnv', 52 | max_episode_steps=100, 53 | reward_threshold=18.0, 54 | ) 55 | 56 | ## locomotors 57 | register( 58 | id='Walker2DPyBulletEnv-v0', 59 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.walker2d_env:Walker2DBulletEnv', 60 | max_episode_steps=1000, 61 | reward_threshold=2500.0 62 | ) 63 | register( 64 | id='HalfCheetahPyBulletEnv-v0', 65 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.half_cheetah_env:HalfCheetahBulletEnv', 66 | max_episode_steps=1000, 67 | reward_threshold=3000.0 68 | ) 69 | 70 | register( 71 | id='AntPyBulletEnv-v0', 72 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.ant_env:AntBulletEnv', 73 | max_episode_steps=1000, 74 | reward_threshold=2500.0 75 | ) 76 | 77 | register( 78 | id='HopperPyBulletEnv-v0', 79 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.hopper_env:HopperBulletEnv', 80 | max_episode_steps=1000, 81 | reward_threshold=2500.0 82 | ) 83 | 84 | register( 85 | id='HumanoidPyBulletEnv-v0', 86 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.humanoid_env:HumanoidBulletEnv', 87 | max_episode_steps=1000 88 | ) 89 | 90 | register( 91 | id='HumanoidFlagrunPyBulletEnv-v0', 92 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.humanoid_flagrun_env:HumanoidFlagrunBulletEnv', 93 | max_episode_steps=1000, 94 | reward_threshold=2000.0 95 | ) 96 | 97 | register( 98 | id='HumanoidFlagrunHarderPyBulletEnv-v0', 99 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.humanoid_flagrun_env:HumanoidFlagrunHarderBulletEnv', 100 | max_episode_steps=1000 101 | ) 102 | 103 | register( 104 | id='AtlasPyBulletEnv-v0', 105 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.atlas_env:AtlasBulletEnv', 106 | max_episode_steps=1000 107 | ) 108 | 109 | # mujoco envs 110 | register( 111 | id='InvertedPendulumMuJoCoEnv-v0', 112 | entry_point='pybulletgym.envs.mujoco.envs.pendulum.inverted_pendulum_env:InvertedPendulumMuJoCoEnv', 113 | max_episode_steps=1000, 114 | reward_threshold=950.0, 115 | ) 116 | 117 | register( 118 | id='InvertedDoublePendulumMuJoCoEnv-v0', 119 | entry_point='pybulletgym.envs.mujoco.envs.pendulum.inverted_double_pendulum_env:InvertedDoublePendulumMuJoCoEnv', 120 | max_episode_steps=1000, 121 | reward_threshold=9100.0, 122 | ) 123 | 124 | register( 125 | id='Walker2DMuJoCoEnv-v0', 126 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.walker2d_env:Walker2DMuJoCoEnv', 127 | max_episode_steps=1000, 128 | reward_threshold=2500.0 129 | ) 130 | register( 131 | id='HalfCheetahMuJoCoEnv-v0', 132 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.half_cheetah_env:HalfCheetahMuJoCoEnv', 133 | max_episode_steps=1000, 134 | reward_threshold=3000.0 135 | ) 136 | 137 | register( 138 | id='AntMuJoCoEnv-v0', 139 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.ant_env:AntMuJoCoEnv', 140 | max_episode_steps=1000, 141 | reward_threshold=2500.0 142 | ) 143 | 144 | register( 145 | id='HopperMuJoCoEnv-v0', 146 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.hopper_env:HopperMuJoCoEnv', 147 | max_episode_steps=1000, 148 | reward_threshold=2500.0 149 | ) 150 | 151 | register( 152 | id='HumanoidMuJoCoEnv-v0', 153 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.humanoid_env:HumanoidMuJoCoEnv', 154 | max_episode_steps=1000 155 | ) 156 | 157 | 158 | def get_list(): 159 | envs = ['- ' + spec.id for spec in gym.pgym.envs.registry.all() if spec.id.find('Bullet') >= 0 or spec.id.find('MuJoCo') >= 0] 160 | return envs 161 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/ant.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 72 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/capsule.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/capsule_fromtoX.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/capsule_fromtoY.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/capsule_fromtoZ.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/cylinder.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/cylinder_fromtoX.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/cylinder_fromtoY.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/cylinder_fromtoZ.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/ground.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/ground_plane.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/half_cheetah.xml: -------------------------------------------------------------------------------- 1 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 89 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/hello_mjcf.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/hopper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 40 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/inverted_double_pendulum.xml: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 48 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/inverted_pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 28 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/reacher.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 40 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/swimmer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 39 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/mjcf/walker2d.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 53 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/drc_extremities_diffuse.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/drc_extremities_diffuse.jpg -------------------------------------------------------------------------------- /pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/extremities_diffuse_unplugged.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/extremities_diffuse_unplugged.jpg -------------------------------------------------------------------------------- /pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/right_leg_diffuse_unplugged.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/right_leg_diffuse_unplugged.jpg -------------------------------------------------------------------------------- /pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/torso_diffuse_unplugged.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/torso_diffuse_unplugged.jpg -------------------------------------------------------------------------------- /pybulletgym/envs/assets/robots/atlas/atlas_description/multisense_sl_description/materials/textures/drc_torso_head_diffuse.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/multisense_sl_description/materials/textures/drc_torso_head_diffuse.jpg -------------------------------------------------------------------------------- /pybulletgym/envs/assets/scenes/stadium/plane100.obj: -------------------------------------------------------------------------------- 1 | # Blender v2.66 (sub 1) OBJ File: '' 2 | # www.blender.org 3 | mtllib plane.mtl 4 | o Plane 5 | v 100.000000 -100.000000 0.000000 6 | v 100.000000 100.000000 0.000000 7 | v -100.000000 100.000000 0.000000 8 | v -100.000000 -100.000000 0.000000 9 | 10 | vt 100.000000 0.000000 11 | vt 100.000000 100.000000 12 | vt 0.000000 100.000000 13 | vt 0.000000 0.000000 14 | 15 | 16 | 17 | usemtl Material 18 | s off 19 | f 1/1 2/2 3/3 20 | f 1/1 3/3 4/4 21 | 22 | 23 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/scenes/stadium/plane100.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/scenes/stadium/plane_stadium.sdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 0 0 -9.8 4 | 5 | 1 6 | 0 0 0 0 0 0 7 | 8 | 9 | 0 10 | 11 | 0.166667 12 | 0 13 | 0 14 | 0.166667 15 | 0 16 | 0.166667 17 | 18 | 19 | 20 | 21 | 22 | 0 0 1 23 | 100 100 24 | 25 | 26 | 27 | 28 | 29 | 30 | 1 1 1 31 | plane100.obj 32 | 33 | 34 | 35 | 1 1 1 1 36 | 1 1 1 1 37 | .5 .5 .5 1 38 | 0 0 0 0 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/scenes/stadium/stadium.mtl: -------------------------------------------------------------------------------- 1 | newmtl stadium_white 2 | Ns 96.078431 3 | Ka 0.000000 0.000000 0.000000 4 | Kd 1.000000 1.000000 1.000000 5 | Ks 0.500000 0.500000 0.500000 6 | 7 | newmtl stadium_grass 8 | Ka 0.000000 0.000000 0.000000 9 | Kd 0.000000 0.500000 0.000000 10 | Ks 0.000000 0.000000 0.000000 11 | map_Kd stadium_grass.jpg 12 | 13 | newmtl stadium_dirt 14 | Ka 0.000000 0.000000 0.000000 15 | Kd 0.600000 0.400000 0.000000 16 | Ks 0.000000 0.000000 0.000000 -------------------------------------------------------------------------------- /pybulletgym/envs/assets/scenes/stadium/stadium.sdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 0 0 -9.8 4 | 5 | 1 6 | 0 0 0 0 0 0 7 | 8 | 9 | 0 10 | 11 | 0.166667 12 | 0 13 | 0 14 | 0.166667 15 | 0 16 | 0.166667 17 | 18 | 19 | 20 | 21 | 22 | 1 1 1 23 | part0.obj 24 | 25 | 26 | 27 | 1 1 1 1 28 | 1.00000 1.00000 1.000000 1 29 | 0.1 .1 .1 1 30 | 0 0 0 0 31 | 32 | 33 | 34 | 35 | 36 | 1 37 | 0 0 0 0 0 0 38 | 39 | 40 | 0 41 | 42 | 0.166667 43 | 0 44 | 0 45 | 0.166667 46 | 0 47 | 0.166667 48 | 49 | 50 | 51 | 52 | 53 | 0 0 1 54 | 100 100 55 | 56 | 57 | 58 | 59 | 60 | 61 | 1 1 1 62 | part1.obj 63 | 64 | 65 | 66 | 1 1 1 1 67 | 0.600000 0.400000 0.000000 1 68 | .5 .5 .5 1 69 | 0 0 0 0 70 | 71 | 72 | 73 | 74 | 75 | 1 76 | 0 0 0 0 0 0 77 | 78 | 79 | 0 80 | 81 | 0.166667 82 | 0 83 | 0 84 | 0.166667 85 | 0 86 | 0.166667 87 | 88 | 89 | 90 | 91 | 92 | 93 | 1 1 1 94 | part2.obj 95 | 96 | 97 | 98 | 1 0 0 1 99 | 0.000000 0.500000 0.000000 1 100 | 0.4 0.4 0.4 1 101 | 0 0 0 0 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/scenes/stadium/stadium_grass.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/scenes/stadium/stadium_grass.jpg -------------------------------------------------------------------------------- /pybulletgym/envs/assets/things/cube_small.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /pybulletgym/envs/assets/things/sphere_small.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /pybulletgym/envs/gym_utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | 4 | from pybulletgym.envs.roboschool.robots.robot_bases import BodyPart 5 | 6 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 7 | parentdir = os.path.dirname(currentdir) 8 | os.sys.path.insert(0, parentdir) 9 | import pybullet_data 10 | 11 | 12 | def get_cube(p, x, y, z): 13 | body = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cube_small.urdf"), [x, y, z]) 14 | p.changeDynamics(body, -1, mass=1.2) # match Roboschool 15 | part_name, _ = p.getBodyInfo(body) 16 | part_name = part_name.decode("utf8") 17 | bodies = [body] 18 | return BodyPart(p, part_name, bodies, 0, -1) 19 | 20 | 21 | def get_sphere(p, x, y, z): 22 | body = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "sphere2red_nocol.urdf"), [x, y, z]) 23 | part_name, _ = p.getBodyInfo(body) 24 | part_name = part_name.decode("utf8") 25 | bodies = [body] 26 | return BodyPart(p, part_name, bodies, 0, -1) 27 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/envs/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/env_bases.py: -------------------------------------------------------------------------------- 1 | import gym, gym.spaces, gym.utils, gym.utils.seeding 2 | import numpy as np 3 | import pybullet 4 | from pybullet_utils import bullet_client 5 | 6 | from pkg_resources import parse_version 7 | 8 | 9 | class BaseBulletEnv(gym.Env): 10 | """ 11 | Base class for Bullet physics simulation environments in a Scene. 12 | These environments create single-player scenes and behave like normal Gym environments, if 13 | you don't use multiplayer. 14 | """ 15 | 16 | metadata = { 17 | 'render.modes': ['human', 'rgb_array'], 18 | 'video.frames_per_second': 60 19 | } 20 | 21 | def __init__(self, robot, render=False): 22 | self.scene = None 23 | self.physicsClientId = -1 24 | self.ownsPhysicsClient = 0 25 | self.camera = Camera() 26 | self.isRender = render 27 | self.robot = robot 28 | self._seed() 29 | self._cam_dist = 3 30 | self._cam_yaw = 0 31 | self._cam_pitch = -30 32 | self._render_width = 320 33 | self._render_height = 240 34 | 35 | self.action_space = robot.action_space 36 | self.observation_space = robot.observation_space 37 | 38 | def configure(self, args): 39 | self.robot.args = args 40 | 41 | def _seed(self, seed=None): 42 | self.np_random, seed = gym.utils.seeding.np_random(seed) 43 | self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env 44 | return [seed] 45 | 46 | def _reset(self): 47 | if self.physicsClientId < 0: 48 | self.ownsPhysicsClient = True 49 | 50 | if self.isRender: 51 | self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI) 52 | else: 53 | self._p = bullet_client.BulletClient() 54 | 55 | self.physicsClientId = self._p._client 56 | self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI,0) 57 | 58 | if self.scene is None: 59 | self.scene = self.create_single_player_scene(self._p) 60 | if not self.scene.multiplayer and self.ownsPhysicsClient: 61 | self.scene.episode_restart(self._p) 62 | 63 | self.robot.scene = self.scene 64 | 65 | self.frame = 0 66 | self.done = 0 67 | self.reward = 0 68 | dump = 0 69 | s = self.robot.reset(self._p) 70 | self.potential = self.robot.calc_potential() 71 | return s 72 | 73 | def _render(self, mode, close=False): 74 | if mode == "human": 75 | self.isRender = True 76 | if mode != "rgb_array": 77 | return np.array([]) 78 | 79 | base_pos = [0, 0, 0] 80 | if hasattr(self,'robot'): 81 | if hasattr(self.robot,'body_xyz'): 82 | base_pos = self.robot.body_xyz 83 | 84 | view_matrix = self._p.computeViewMatrixFromYawPitchRoll( 85 | cameraTargetPosition=base_pos, 86 | distance=self._cam_dist, 87 | yaw=self._cam_yaw, 88 | pitch=self._cam_pitch, 89 | roll=0, 90 | upAxisIndex=2) 91 | proj_matrix = self._p.computeProjectionMatrixFOV( 92 | fov=60, aspect=float(self._render_width)/self._render_height, 93 | nearVal=0.1, farVal=100.0) 94 | (_, _, px, _, _) = self._p.getCameraImage( 95 | width = self._render_width, height=self._render_height, viewMatrix=view_matrix, 96 | projectionMatrix=proj_matrix, 97 | renderer=pybullet.ER_BULLET_HARDWARE_OPENGL 98 | ) 99 | rgb_array = np.array(px) 100 | rgb_array = rgb_array[:, :, :3] 101 | return rgb_array 102 | 103 | def _close(self): 104 | if self.ownsPhysicsClient: 105 | if self.physicsClientId >= 0: 106 | self._p.disconnect() 107 | self.physicsClientId = -1 108 | 109 | def HUD(self, state, a, done): 110 | pass 111 | 112 | # backwards compatibility for gym >= v0.9.x 113 | # for extension of this class. 114 | def step(self, *args, **kwargs): 115 | return self._step(*args, **kwargs) 116 | 117 | if parse_version(gym.__version__)>=parse_version('0.9.6'): 118 | close = _close 119 | render = _render 120 | reset = _reset 121 | seed = _seed 122 | 123 | 124 | class Camera: 125 | def __init__(self): 126 | pass 127 | 128 | def move_and_look_at(self,i,j,k,x,y,z): 129 | lookat = [x,y,z] 130 | distance = 10 131 | yaw = 10 132 | self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat) 133 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/locomotion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/envs/locomotion/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/locomotion/ant_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv 2 | from pybulletgym.envs.mujoco.robots.locomotors.ant import Ant 3 | 4 | 5 | class AntMuJoCoEnv(WalkerBaseMuJoCoEnv): 6 | def __init__(self): 7 | self.robot = Ant() 8 | WalkerBaseMuJoCoEnv.__init__(self, self.robot) 9 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/locomotion/half_cheetah_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv 2 | from pybulletgym.envs.mujoco.robots.locomotors.half_cheetah import HalfCheetah 3 | import numpy as np 4 | 5 | 6 | class HalfCheetahMuJoCoEnv(WalkerBaseMuJoCoEnv): 7 | def __init__(self): 8 | self.robot = HalfCheetah() 9 | WalkerBaseMuJoCoEnv.__init__(self, self.robot) 10 | 11 | def step(self, a): 12 | if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions 13 | self.robot.apply_action(a) 14 | self.scene.global_step() 15 | 16 | potential = self.robot.calc_potential() 17 | power_cost = -0.1 * np.square(a).sum() 18 | state = self.robot.calc_state() 19 | 20 | done = False 21 | 22 | debugmode = 0 23 | if debugmode: 24 | print("potential=") 25 | print(potential) 26 | print("power_cost=") 27 | print(power_cost) 28 | 29 | self.rewards = [ 30 | potential, 31 | power_cost 32 | ] 33 | if debugmode: 34 | print("rewards=") 35 | print(self.rewards) 36 | print("sum rewards") 37 | print(sum(self.rewards)) 38 | self.HUD(state, a, done) 39 | self.reward += sum(self.rewards) 40 | 41 | return state, sum(self.rewards), bool(done), {} 42 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/locomotion/hopper_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv 2 | from pybulletgym.envs.mujoco.robots.locomotors.hopper import Hopper 3 | import numpy as np 4 | 5 | 6 | class HopperMuJoCoEnv(WalkerBaseMuJoCoEnv): 7 | def __init__(self): 8 | self.robot = Hopper() 9 | WalkerBaseMuJoCoEnv.__init__(self, self.robot) 10 | 11 | def step(self, a): 12 | if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions 13 | self.robot.apply_action(a) 14 | self.scene.global_step() 15 | 16 | alive_bonus = 1.0 17 | potential = self.robot.calc_potential() 18 | power_cost = -1e-3 * np.square(a).sum() 19 | state = self.robot.calc_state() 20 | 21 | height, ang = state[0], state[1] 22 | 23 | done = not (np.isfinite(state).all() and 24 | (np.abs(state[2:]) < 100).all() and 25 | (height > -0.3) and # height starts at 0 in pybullet 26 | (abs(ang) < .2)) 27 | 28 | debugmode = 0 29 | if debugmode: 30 | print("potential=") 31 | print(potential) 32 | print("power_cost=") 33 | print(power_cost) 34 | 35 | self.rewards = [ 36 | potential, 37 | alive_bonus, 38 | power_cost 39 | ] 40 | if debugmode: 41 | print("rewards=") 42 | print(self.rewards) 43 | print("sum rewards") 44 | print(sum(self.rewards)) 45 | self.HUD(state, a, done) 46 | self.reward += sum(self.rewards) 47 | 48 | return state, sum(self.rewards), bool(done), {} 49 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/locomotion/humanoid_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv 2 | from pybulletgym.envs.mujoco.robots.locomotors.humanoid import Humanoid 3 | 4 | 5 | class HumanoidMuJoCoEnv(WalkerBaseMuJoCoEnv): 6 | def __init__(self, robot=None): 7 | self.robot = robot if robot is not None else Humanoid() 8 | WalkerBaseMuJoCoEnv.__init__(self, self.robot) 9 | self.electricity_cost = 4.25 * WalkerBaseMuJoCoEnv.electricity_cost 10 | self.stall_torque_cost = 4.25 * WalkerBaseMuJoCoEnv.stall_torque_cost 11 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/locomotion/walker2d_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv 2 | from pybulletgym.envs.mujoco.robots.locomotors.walker2d import Walker2D 3 | import numpy as np 4 | 5 | 6 | class Walker2DMuJoCoEnv(WalkerBaseMuJoCoEnv): 7 | def __init__(self): 8 | self.robot = Walker2D() 9 | WalkerBaseMuJoCoEnv.__init__(self, self.robot) 10 | 11 | def step(self, a): 12 | if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions 13 | self.robot.apply_action(a) 14 | self.scene.global_step() 15 | 16 | alive_bonus = 1.0 17 | potential = self.robot.calc_potential() 18 | power_cost = -1e-3 * np.square(a).sum() 19 | state = self.robot.calc_state() 20 | 21 | height, ang = state[0], state[1] 22 | 23 | done = not (np.isfinite(state).all() and 24 | (np.abs(state[2:]) < 100).all() and 25 | (1.0 > height > -0.2) and # height starts at 0 in pybullet 26 | (-1.0 < ang < 1.0)) 27 | 28 | debugmode = 0 29 | if debugmode: 30 | print("potential=") 31 | print(potential) 32 | print("power_cost=") 33 | print(power_cost) 34 | 35 | self.rewards = [ 36 | potential, 37 | alive_bonus, 38 | power_cost 39 | ] 40 | if debugmode: 41 | print("rewards=") 42 | print(self.rewards) 43 | print("sum rewards") 44 | print(sum(self.rewards)) 45 | self.HUD(state, a, done) 46 | self.reward += sum(self.rewards) 47 | 48 | return state, sum(self.rewards), bool(done), {} 49 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/manipulation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/envs/manipulation/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/manipulation/pusher_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.mujoco.robots.manipulators.pusher import Pusher 3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene 4 | 5 | 6 | class PusherBulletEnv(BaseBulletEnv): 7 | def __init__(self): 8 | self.robot = Pusher() 9 | BaseBulletEnv.__init__(self, self.robot) 10 | 11 | def create_single_player_scene(self, bullet_client): 12 | return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5) 13 | 14 | def _step(self, a): 15 | self.robot.apply_action(a) 16 | self.scene.global_step() 17 | 18 | state = self.robot.calc_state() # sets self.to_target_vec 19 | 20 | potential_old = self.potential 21 | self.potential = self.robot.calc_potential() 22 | 23 | joint_vel = np.array([ 24 | self.robot.shoulder_pan_joint.get_velocity(), 25 | self.robot.shoulder_lift_joint.get_velocity(), 26 | self.robot.upper_arm_roll_joint.get_velocity(), 27 | self.robot.elbow_flex_joint.get_velocity(), 28 | self.robot.forearm_roll_joint.get_velocity(), 29 | self.robot.wrist_flex_joint.get_velocity(), 30 | self.robot.wrist_roll_joint.get_velocity() 31 | ]) 32 | 33 | action_product = np.matmul(np.abs(a), np.abs(joint_vel)) 34 | action_sum = np.sum(a) 35 | 36 | electricity_cost = ( 37 | -0.10 * action_product # work torque*angular_velocity 38 | - 0.01 * action_sum # stall torque require some energy 39 | ) 40 | 41 | stuck_joint_cost = 0 42 | for j in self.robot.ordered_joints: 43 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01: 44 | stuck_joint_cost += -0.1 45 | 46 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)] 47 | self.HUD(state, a, False) 48 | return state, sum(self.rewards), False, {} 49 | 50 | def calc_potential(self): 51 | return -100 * np.linalg.norm(self.to_target_vec) 52 | 53 | def camera_adjust(self): 54 | x, y, z = self.robot.fingertip.pose().xyz() 55 | x *= 0.5 56 | y *= 0.5 57 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z) 58 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/manipulation/reacher_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.mujoco.robots.manipulators.reacher import Reacher 3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene 4 | import numpy as np 5 | 6 | 7 | class ReacherBulletEnv(BaseBulletEnv): 8 | def __init__(self): 9 | self.robot = Reacher() 10 | BaseBulletEnv.__init__(self, self.robot) 11 | 12 | def create_single_player_scene(self, bullet_client): 13 | return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0165, frame_skip=1) 14 | 15 | def _step(self, a): 16 | assert (not self.scene.multiplayer) 17 | self.robot.apply_action(a) 18 | self.scene.global_step() 19 | 20 | state = self.robot.calc_state() # sets self.to_target_vec 21 | 22 | potential_old = self.potential 23 | self.potential = self.robot.calc_potential() 24 | 25 | electricity_cost = ( 26 | -0.10 * (np.abs(a[0] * self.robot.theta_dot) + np.abs(a[1] * self.robot.gamma_dot)) # work torque*angular_velocity 27 | - 0.01 * (np.abs(a[0]) + np.abs(a[1])) # stall torque require some energy 28 | ) 29 | stuck_joint_cost = -0.1 if np.abs(np.abs(self.robot.gamma) - 1) < 0.01 else 0.0 30 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)] 31 | self.HUD(state, a, False) 32 | return state, sum(self.rewards), False, {} 33 | 34 | def camera_adjust(self): 35 | x, y, z = self.robot.fingertip.pose().xyz() 36 | x *= 0.5 37 | y *= 0.5 38 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z) 39 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/manipulation/striker_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.mujoco.robots.manipulators.striker import Striker 3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene 4 | import numpy as np 5 | 6 | 7 | class StrikerBulletEnv(BaseBulletEnv): 8 | def __init__(self): 9 | self.robot = Striker() 10 | BaseBulletEnv.__init__(self, self.robot) 11 | self._striked = False 12 | self._min_strike_dist = np.inf 13 | self.strike_threshold = 0.1 14 | 15 | def create_single_player_scene(self, bullet_client): 16 | return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5) 17 | 18 | def _step(self, a): 19 | self.robot.apply_action(a) 20 | self.scene.global_step() 21 | 22 | state = self.robot.calc_state() # sets self.to_target_vec 23 | 24 | potential_old = self.potential 25 | self.potential = self.robot.calc_potential() 26 | 27 | joint_vel = np.array([ 28 | self.robot.shoulder_pan_joint.get_velocity(), 29 | self.robot.shoulder_lift_joint.get_velocity(), 30 | self.robot.upper_arm_roll_joint.get_velocity(), 31 | self.robot.elbow_flex_joint.get_velocity(), 32 | self.robot.upper_arm_roll_joint.get_velocity(), 33 | self.robot.wrist_flex_joint.get_velocity(), 34 | self.robot.wrist_roll_joint.get_velocity() 35 | ]) 36 | 37 | action_product = np.matmul(np.abs(a), np.abs(joint_vel)) 38 | action_sum = np.sum(a) 39 | 40 | electricity_cost = ( 41 | -0.10 * action_product # work torque*angular_velocity 42 | - 0.01 * action_sum # stall torque require some energy 43 | ) 44 | 45 | stuck_joint_cost = 0 46 | for j in self.robot.ordered_joints: 47 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01: 48 | stuck_joint_cost += -0.1 49 | 50 | dist_object_finger = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz() 51 | reward_dist_vec = self.robot.object.pose().xyz() - self.robot.target.pose().xyz() # TODO: Should the object and target really belong to the robot? Maybe split this off 52 | 53 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(reward_dist_vec)) 54 | 55 | if np.linalg.norm(dist_object_finger) < self.strike_threshold: 56 | self._striked = True 57 | self._strike_pos = self.robot.fingertip.pose().xyz() 58 | 59 | if self._striked: 60 | reward_near_vec = self.robot.object.pose().xyz() - self._strike_pos 61 | else: 62 | reward_near_vec = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz() 63 | 64 | reward_near = - np.linalg.norm(reward_near_vec) 65 | 66 | reward_dist = - np.linalg.norm(self._min_strike_dist) 67 | reward_ctrl = - np.square(a).sum() 68 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost), 69 | 3 * reward_dist, 0.1 * reward_ctrl, 0.5 * reward_near] 70 | self.HUD(state, a, False) 71 | return state, sum(self.rewards), False, {} 72 | 73 | def calc_potential(self): 74 | return -100 * np.linalg.norm(self.to_target_vec) 75 | 76 | def camera_adjust(self): 77 | x, y, z = self.robot.fingertip.pose().xyz() 78 | x *= 0.5 79 | y *= 0.5 80 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z) 81 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/manipulation/thrower_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.mujoco.robots.manipulators.thrower import Thrower 3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene 4 | import numpy as np 5 | 6 | 7 | class ThrowerBulletEnv(BaseBulletEnv): 8 | def __init__(self): 9 | self.robot = Thrower() 10 | BaseBulletEnv.__init__(self, self.robot) 11 | 12 | def create_single_player_scene(self, bullet_client): 13 | return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0020, frame_skip=5) 14 | 15 | def _step(self, a): 16 | self.robot.apply_action(a) 17 | self.scene.global_step() 18 | state = self.robot.calc_state() # sets self.to_target_vec 19 | 20 | potential_old = self.potential 21 | self.potential = self.robot.calc_potential() 22 | 23 | joint_vel = np.array([ 24 | self.robot.shoulder_pan_joint.get_velocity(), 25 | self.robot.shoulder_lift_joint.get_velocity(), 26 | self.robot.upper_arm_roll_joint.get_velocity(), 27 | self.robot.elbow_flex_joint.get_velocity(), 28 | self.robot.upper_arm_roll_joint.get_velocity(), 29 | self.robot.wrist_flex_joint.get_velocity(), 30 | self.robot.wrist_roll_joint.get_velocity() 31 | ]) 32 | 33 | action_product = np.matmul(np.abs(a), np.abs(joint_vel)) 34 | action_sum = np.sum(a) 35 | 36 | electricity_cost = ( 37 | -0.10 * action_product # work torque*angular_velocity 38 | - 0.01 * action_sum # stall torque require some energy 39 | ) 40 | 41 | stuck_joint_cost = 0 42 | for j in self.robot.ordered_joints: 43 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01: 44 | stuck_joint_cost += -0.1 45 | 46 | object_xy = self.robot.object.pose().xyz()[:2] 47 | target_xy = self.robot.target.pose().xyz()[:2] 48 | 49 | if not self.robot._object_hit_ground and self.robot.object.pose().xyz()[2] < -0.25: # TODO: Should the object and target really belong to the robot? Maybe split this off 50 | self.robot._object_hit_ground = True 51 | self.robot._object_hit_location = self.robot.object.pose().xyz() 52 | 53 | if self.robot._object_hit_ground: 54 | object_hit_xy = self.robot._object_hit_location[:2] 55 | reward_dist = -np.linalg.norm(object_hit_xy - target_xy) 56 | else: 57 | reward_dist = -np.linalg.norm(object_xy - target_xy) 58 | reward_ctrl = - np.square(a).sum() 59 | 60 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost), 61 | reward_dist, 0.002 * reward_ctrl] 62 | self.HUD(state, a, False) 63 | return state, sum(self.rewards), False, {} 64 | 65 | def camera_adjust(self): 66 | x, y, z = self.robot.fingertip.pose().xyz() 67 | x *= 0.5 68 | y *= 0.5 69 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z) 70 | 71 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/pendulum/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/envs/pendulum/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/pendulum/inverted_double_pendulum_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.mujoco.robots.pendula.inverted_double_pendulum import InvertedDoublePendulum 3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene 4 | 5 | 6 | class InvertedDoublePendulumMuJoCoEnv(BaseBulletEnv): 7 | def __init__(self): 8 | self.robot = InvertedDoublePendulum() 9 | BaseBulletEnv.__init__(self, self.robot) 10 | self.stateId = -1 11 | 12 | def create_single_player_scene(self, bullet_client): 13 | return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) 14 | 15 | def reset(self): 16 | if self.stateId >= 0: 17 | self._p.restoreState(self.stateId) 18 | r = BaseBulletEnv._reset(self) 19 | if self.stateId < 0: 20 | self.stateId = self._p.saveState() 21 | return r 22 | 23 | def step(self, a): 24 | self.robot.apply_action(a) 25 | self.scene.global_step() 26 | state = self.robot.calc_state() 27 | # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9 28 | # using tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3 29 | pos_x, _, pos_y = self.robot.pole2.pose().xyz() 30 | dist_penalty = 0.01 * pos_x ** 2 + (pos_y + 0.3 - 2) ** 2 31 | v1, v2 = self.robot.j1.current_position()[1], self.robot.j2.current_position()[1] 32 | vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 33 | alive_bonus = 10 34 | done = pos_y + 0.3 <= 1 35 | self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)] 36 | self.HUD(state, a, done) 37 | return state, sum(self.rewards), done, {} 38 | 39 | def camera_adjust(self): 40 | self.camera.move_and_look_at(0, 1.2, 1.2, 0, 0, 0.5) 41 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/envs/pendulum/inverted_pendulum_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.mujoco.robots.pendula.inverted_pendulum import InvertedPendulum 3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene 4 | import numpy as np 5 | 6 | 7 | class InvertedPendulumMuJoCoEnv(BaseBulletEnv): 8 | def __init__(self): 9 | self.robot = InvertedPendulum() 10 | BaseBulletEnv.__init__(self, self.robot) 11 | self.stateId = -1 12 | 13 | def create_single_player_scene(self, bullet_client): 14 | return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) 15 | 16 | def reset(self): 17 | if self.stateId >= 0: 18 | # print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")") 19 | self._p.restoreState(self.stateId) 20 | r = BaseBulletEnv._reset(self) 21 | if self.stateId < 0: 22 | self.stateId = self._p.saveState() 23 | # print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId) 24 | return r 25 | 26 | def step(self, a): 27 | self.robot.apply_action(a) 28 | self.scene.global_step() 29 | state = self.robot.calc_state() # sets self.pos_x self.pos_y 30 | vel_penalty = 0 31 | reward = 1.0 32 | done = not np.isfinite(state).all() or np.abs(state[1]) > .2 33 | self.rewards = [float(reward)] 34 | self.HUD(state, a, done) 35 | return state, sum(self.rewards), done, {} 36 | 37 | def camera_adjust(self): 38 | self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5) 39 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/robots/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/locomotors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/robots/locomotors/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/locomotors/ant.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot 3 | import numpy as np 4 | 5 | 6 | class Ant(WalkerBase, MJCFBasedRobot): 7 | foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot'] 8 | 9 | def __init__(self): 10 | WalkerBase.__init__(self, power=2.5) 11 | MJCFBasedRobot.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=111) 12 | 13 | def calc_state(self): 14 | WalkerBase.calc_state(self) 15 | pose = self.parts['torso'].get_pose() 16 | qpos = np.hstack((pose, [j.get_position() for j in self.ordered_joints])).flatten() # shape (15,) 17 | 18 | velocity = self.parts['torso'].get_velocity() 19 | qvel = np.hstack((velocity[0], velocity[1], [j.get_velocity() for j in self.ordered_joints])).flatten() # shape (14,) 20 | 21 | cfrc_ext = np.zeros((14, 6)) # shape (14, 6) # TODO: FIND cfrc_ext 22 | return np.concatenate([ 23 | qpos.flat[2:], # self.sim.data.qpos.flat[2:], 24 | qvel.flat, # self.sim.data.qvel.flat, 25 | np.clip(cfrc_ext, -1, 1).flat # np.clip(self.sim.data.cfrc_ext, -1, 1).flat, 26 | ]) 27 | 28 | def alive_bonus(self, z, pitch): 29 | return +1 if z > 0.26 else -1 # 0.25 is central sphere rad, die if it scrapes the ground 30 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/locomotors/half_cheetah.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot 3 | import numpy as np 4 | 5 | 6 | class HalfCheetah(WalkerBase, MJCFBasedRobot): 7 | """ 8 | Half Cheetah implementation based on MuJoCo. 9 | """ 10 | foot_list = ["ffoot", "fshin", "fthigh", "bfoot", "bshin", "bthigh"] # track these contacts with ground 11 | 12 | def __init__(self): 13 | WalkerBase.__init__(self, power=1) 14 | MJCFBasedRobot.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=17, add_ignored_joints=True) 15 | 16 | self.pos_after = 0 17 | 18 | def calc_state(self): 19 | qpos = np.array([j.get_position() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (9,) 20 | qvel = np.array([j.get_velocity() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (9,) 21 | 22 | return np.concatenate([ 23 | qpos.flat[1:], # self.sim.data.qpos.flat[1:], 24 | qvel.flat # self.sim.data.qvel.flat, 25 | ]) 26 | 27 | def calc_potential(self): 28 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), 29 | # all rewards have rew/frame units and close to 1.0 30 | pos_before = self.pos_after 31 | self.pos_after = self.robot_body.get_pose()[0] 32 | debugmode = 0 33 | if debugmode: 34 | print("calc_potential: self.walk_target_dist") 35 | print(self.walk_target_dist) 36 | print("self.scene.dt") 37 | print(self.scene.dt) 38 | print("self.scene.frame_skip") 39 | print(self.scene.frame_skip) 40 | print("self.scene.timestep") 41 | print(self.scene.timestep) 42 | return (self.pos_after-pos_before) / self.scene.dt 43 | 44 | def robot_specific_reset(self, bullet_client): 45 | WalkerBase.robot_specific_reset(self, bullet_client) 46 | for part_id, part in self.parts.items(): 47 | self._p.changeDynamics(part.bodyIndex, part.bodyPartIndex, lateralFriction=0.8, spinningFriction=0.1, rollingFriction=0.1, restitution=0.5) 48 | 49 | self.jdict["bthigh"].power_coef = 120.0 50 | self.jdict["bshin"].power_coef = 90.0 51 | self.jdict["bfoot"].power_coef = 60.0 52 | self.jdict["fthigh"].power_coef = 140.0 53 | self.jdict["fshin"].power_coef = 60.0 54 | self.jdict["ffoot"].power_coef = 30.0 55 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/locomotors/hopper.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot 3 | import numpy as np 4 | 5 | 6 | class Hopper(WalkerBase, MJCFBasedRobot): 7 | """ 8 | Hopper implementation based on MuJoCo. 9 | """ 10 | foot_list = ["foot"] 11 | 12 | def __init__(self): 13 | WalkerBase.__init__(self, power=0.75) 14 | MJCFBasedRobot.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=11, add_ignored_joints=True) 15 | 16 | self.pos_after = 0 17 | 18 | def calc_state(self): 19 | qpos = np.array([j.get_position() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (6,) 20 | qvel = np.array([j.get_velocity() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (6,) 21 | 22 | return np.concatenate([ 23 | qpos.flat[1:], # self.sim.data.qpos.flat[1:], 24 | np.clip(qvel, -10, 10).flat # self.sim.data.qvel.flat, 25 | ]) 26 | 27 | def calc_potential(self): 28 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), 29 | # all rewards have rew/frame units and close to 1.0 30 | pos_before = self.pos_after 31 | self.pos_after = self.robot_body.get_pose()[0] 32 | debugmode = 0 33 | if debugmode: 34 | print("calc_potential: self.walk_target_dist") 35 | print(self.walk_target_dist) 36 | print("self.scene.dt") 37 | print(self.scene.dt) 38 | print("self.scene.frame_skip") 39 | print(self.scene.frame_skip) 40 | print("self.scene.timestep") 41 | print(self.scene.timestep) 42 | return (self.pos_after-pos_before) / self.scene.dt 43 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/locomotors/humanoid.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot 3 | import numpy as np 4 | 5 | 6 | class Humanoid(WalkerBase, MJCFBasedRobot): 7 | self_collision = True 8 | foot_list = ["right_foot", "left_foot"] # "left_hand", "right_hand" 9 | 10 | def __init__(self, random_yaw=False, random_lean=False): 11 | WalkerBase.__init__(self, power=0.41) 12 | MJCFBasedRobot.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=376) 13 | # 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25 14 | self.random_yaw = random_yaw 15 | self.random_lean = random_lean 16 | 17 | def robot_specific_reset(self, bullet_client): 18 | WalkerBase.robot_specific_reset(self, bullet_client) 19 | self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"] 20 | self.motor_power = [100, 100, 100] 21 | self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"] 22 | self.motor_power += [100, 100, 300, 200] 23 | self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"] 24 | self.motor_power += [100, 100, 300, 200] 25 | self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"] 26 | self.motor_power += [75, 75, 75] 27 | self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"] 28 | self.motor_power += [75, 75, 75] 29 | self.motors = [self.jdict[n] for n in self.motor_names] 30 | if self.random_yaw: 31 | position = [0, 0, 0] 32 | orientation = [0, 0, 0] 33 | yaw = self.np_random.uniform(low=-3.14, high=3.14) 34 | if self.random_lean and self.np_random.randint(2) == 0: 35 | if self.np_random.randint(2) == 0: 36 | pitch = np.pi/2 37 | position = [0, 0, 0.45] 38 | else: 39 | pitch = np.pi*3/2 40 | position = [0, 0, 0.25] 41 | roll = 0 42 | orientation = [roll, pitch, yaw] 43 | else: 44 | position = [0, 0, 1.4] 45 | orientation = [0, 0, yaw] # just face random direction, but stay straight otherwise 46 | self.robot_body.reset_position(position) 47 | self.robot_body.reset_orientation(p.getQuaternionFromEuler(orientation)) 48 | self.initial_z = 0.8 49 | 50 | def calc_state(self): 51 | WalkerBase.calc_state(self) 52 | 53 | pose = self.parts['torso'].get_pose() 54 | qpos = np.hstack((pose, [j.get_position() for j in self.ordered_joints])).flatten() # shape (24,) 55 | 56 | velocity = self.parts['torso'].get_velocity() 57 | qvel = np.hstack((velocity[0], velocity[1], [j.get_velocity() for j in self.ordered_joints])).flatten() # shape (23,) 58 | 59 | cinert = np.zeros((14, 10)) # shape (14, 10) # TODO: FIND 60 | cvel = np.zeros((14, 6)) # shape (14, 6) # TODO: FIND 61 | qfrc_actuator = np.zeros(23) # shape (23,) # TODO: FIND 62 | cfrc_ext = np.zeros((14, 6)) # shape (14, 6) # TODO: FIND cfrc_ext 63 | return np.concatenate([ 64 | qpos.flat[2:], # self.sim.data.qpos.flat[2:], 65 | qvel.flat, # self.sim.data.qvel.flat, 66 | cinert.flat, # data.cinert.flat, 67 | cvel.flat, # data.cvel.flat, 68 | qfrc_actuator.flat, # data.qfrc_actuator.flat, 69 | cfrc_ext.flat # data.cfrc_ext.flat 70 | ]) 71 | 72 | def apply_action(self, a): 73 | assert(np.isfinite(a).all()) 74 | force_gain = 1 75 | for i, m, power in zip(range(17), self.motors, self.motor_power): 76 | m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1))) 77 | 78 | def alive_bonus(self, z, pitch): 79 | return +2 if z > 0.78 else -1 # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying 80 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/locomotors/walker2d.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot 3 | import numpy as np 4 | 5 | 6 | class Walker2D(WalkerBase, MJCFBasedRobot): 7 | """ 8 | Walker2D implementation based on MuJoCo. 9 | """ 10 | foot_list = ["foot", "foot_left"] 11 | 12 | def __init__(self): 13 | WalkerBase.__init__(self, power=0.40) 14 | MJCFBasedRobot.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=17, add_ignored_joints=True) 15 | 16 | self.pos_after = 0 17 | 18 | def calc_state(self): 19 | qpos = np.array([j.get_position() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (9,) 20 | qvel = np.array([j.get_velocity() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (9,) 21 | 22 | return np.concatenate([ 23 | qpos[1:], # qpos[1:] 24 | np.clip(qvel, -10, 10) # np.clip(qvel, -10, 10) 25 | ]) 26 | 27 | def calc_potential(self): 28 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), 29 | # all rewards have rew/frame units and close to 1.0 30 | pos_before = self.pos_after 31 | self.pos_after = self.robot_body.get_pose()[0] 32 | debugmode = 0 33 | if debugmode: 34 | print("calc_potential: self.walk_target_dist") 35 | print(self.walk_target_dist) 36 | print("self.scene.dt") 37 | print(self.scene.dt) 38 | print("self.scene.frame_skip") 39 | print(self.scene.frame_skip) 40 | print("self.scene.timestep") 41 | print(self.scene.timestep) 42 | return (self.pos_after-pos_before) / self.scene.dt 43 | 44 | def robot_specific_reset(self, bullet_client): 45 | WalkerBase.robot_specific_reset(self, bullet_client) 46 | for n in ["foot_joint", "foot_left_joint"]: 47 | self.jdict[n].power_coef = 30.0 48 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/locomotors/walker_base.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.robot_bases import XmlBasedRobot 2 | import numpy as np 3 | 4 | 5 | class WalkerBase(XmlBasedRobot): 6 | def __init__(self, power): 7 | self.power = power 8 | self.camera_x = 0 9 | self.start_pos_x, self.start_pos_y, self.start_pos_z = 0, 0, 0 10 | self.walk_target_x = 1e3 # kilometers away 11 | self.walk_target_y = 0 12 | self.body_xyz = [0, 0, 0] 13 | 14 | def robot_specific_reset(self, bullet_client): 15 | self._p = bullet_client 16 | for j in self.ordered_joints: 17 | j.reset_current_position(self.np_random.uniform(low=-0.1, high=0.1), 0) 18 | 19 | self.feet = [self.parts[f] for f in self.foot_list] 20 | self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32) 21 | self.scene.actor_introduce(self) 22 | self.initial_z = None 23 | 24 | def apply_action(self, a): 25 | assert (np.isfinite(a).all()) 26 | i = 0 27 | for n, j in enumerate(self.ordered_joints): 28 | if j.power_coef != 0: # in case the ignored joints are added, they have 0 power 29 | j.set_motor_torque(self.power * j.power_coef * float(np.clip(a[n-i], -1, +1))) 30 | else: 31 | i += 1 32 | 33 | def calc_state(self): 34 | j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten() 35 | # even elements [0::2] position, scaled to -1..+1 between limits 36 | # odd elements [1::2] angular speed, scaled to show -1..+1 37 | self.joint_speeds = j[1::2] 38 | self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99) 39 | 40 | body_pose = self.robot_body.pose() 41 | parts_xyz = np.array([p.pose().xyz() for p in self.parts.values()]).flatten() 42 | self.body_xyz = ( 43 | parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2]) # torso z is more informative than mean z 44 | self.body_rpy = body_pose.rpy() 45 | z = self.body_xyz[2] 46 | if self.initial_z is None: 47 | self.initial_z = z 48 | r, p, yaw = self.body_rpy 49 | self.walk_target_theta = np.arctan2(self.walk_target_y - self.body_xyz[1], 50 | self.walk_target_x - self.body_xyz[0]) 51 | self.walk_target_dist = np.linalg.norm( 52 | [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]]) 53 | angle_to_target = self.walk_target_theta - yaw 54 | 55 | rot_speed = np.array( 56 | [[np.cos(-yaw), -np.sin(-yaw), 0], 57 | [np.sin(-yaw), np.cos(-yaw), 0], 58 | [ 0, 0, 1]] 59 | ) 60 | vx, vy, vz = np.dot(rot_speed, self.robot_body.speed()) # rotate speed back to body point of view 61 | 62 | more = np.array([ z-self.initial_z, 63 | np.sin(angle_to_target), np.cos(angle_to_target), 64 | 0.3 * vx, 0.3 * vy, 0.3 * vz, # 0.3 is just scaling typical speed into -1..+1, no physical sense here 65 | r, p], dtype=np.float32) 66 | return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5) 67 | 68 | def calc_potential(self): 69 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), 70 | # all rewards have rew/frame units and close to 1.0 71 | try: 72 | debugmode = 0 73 | if debugmode: 74 | print("calc_potential: self.walk_target_dist") 75 | print(self.walk_target_dist) 76 | print("self.scene.dt") 77 | print(self.scene.dt) 78 | print("self.scene.frame_skip") 79 | print(self.scene.frame_skip) 80 | print("self.scene.timestep") 81 | print(self.scene.timestep) 82 | return - self.walk_target_dist / self.scene.dt 83 | except AttributeError: 84 | return - self.walk_target_dist 85 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/manipulators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/robots/manipulators/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/manipulators/pusher.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot 2 | import numpy as np 3 | 4 | 5 | class Pusher(MJCFBasedRobot): 6 | min_target_placement_radius = 0.5 7 | max_target_placement_radius = 0.8 8 | min_object_to_target_distance = 0.1 9 | max_object_to_target_distance = 0.4 10 | 11 | def __init__(self): 12 | MJCFBasedRobot.__init__(self, 'pusher.xml', 'body0', action_dim=7, obs_dim=55) 13 | 14 | def robot_specific_reset(self, bullet_client): 15 | # parts 16 | self.fingertip = self.parts["tips_arm"] 17 | self.target = self.parts["goal"] 18 | self.object = self.parts["object"] 19 | 20 | # joints 21 | self.shoulder_pan_joint = self.jdict["r_shoulder_pan_joint"] 22 | self.shoulder_lift_joint = self.jdict["r_shoulder_lift_joint"] 23 | self.upper_arm_roll_joint = self.jdict["r_upper_arm_roll_joint"] 24 | self.elbow_flex_joint = self.jdict["r_elbow_flex_joint"] 25 | self.forearm_roll_joint = self.jdict["r_forearm_roll_joint"] 26 | self.wrist_flex_joint = self.jdict["r_wrist_flex_joint"] 27 | self.wrist_roll_joint = self.jdict["r_wrist_roll_joint"] 28 | 29 | self.target_pos = np.concatenate([ 30 | self.np_random.uniform(low=-1, high=1, size=1), 31 | self.np_random.uniform(low=-1, high=1, size=1) 32 | ]) 33 | 34 | # make length of vector between min and max_target_placement_radius 35 | self.target_pos = self.target_pos \ 36 | / np.linalg.norm(self.target_pos) \ 37 | * self.np_random.uniform(low=self.min_target_placement_radius, 38 | high=self.max_target_placement_radius, size=1) 39 | 40 | self.object_pos = np.concatenate([ 41 | self.np_random.uniform(low=-1, high=1, size=1), 42 | self.np_random.uniform(low=-1, high=1, size=1) 43 | ]) 44 | 45 | # make length of vector between min and max_object_to_target_distance 46 | self.object_pos = self.object_pos \ 47 | / np.linalg.norm(self.object_pos - self.target_pos) \ 48 | * self.np_random.uniform(low=self.min_object_to_target_distance, 49 | high=self.max_object_to_target_distance, size=1) 50 | 51 | # set position of objects 52 | self.zero_offset = np.array([0.45, 0.55]) 53 | self.jdict["goal_slidex"].reset_current_position(self.target_pos[0] - self.zero_offset[0], 0) 54 | self.jdict["goal_slidey"].reset_current_position(self.target_pos[1] - self.zero_offset[1], 0) 55 | self.jdict["obj_slidex"].reset_current_position(self.object_pos[0] - self.zero_offset[0], 0) 56 | self.jdict["obj_slidey"].reset_current_position(self.object_pos[1] - self.zero_offset[1], 0) 57 | 58 | # randomize all joints TODO: Will this work or do we have to constrain this resetting in some way? 59 | self.shoulder_pan_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 60 | self.shoulder_lift_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 61 | self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 62 | self.elbow_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 63 | self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 64 | self.wrist_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 65 | self.wrist_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 66 | 67 | def apply_action(self, a): 68 | assert (np.isfinite(a).all()) 69 | self.shoulder_pan_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1))) 70 | self.shoulder_lift_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1))) 71 | self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[2], -1, +1))) 72 | self.elbow_flex_joint.set_motor_torque(0.05 * float(np.clip(a[3], -1, +1))) 73 | self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[4], -1, +1))) 74 | self.wrist_flex_joint.set_motor_torque(0.05 * float(np.clip(a[5], -1, +1))) 75 | self.wrist_roll_joint.set_motor_torque(0.05 * float(np.clip(a[6], -1, +1))) 76 | 77 | def calc_state(self): 78 | qpos = np.array([j.current_position() for j in self.ordered_joints]) # shape (11,) 79 | qvel = np.array([j.current_relative_position() for j in self.ordered_joints]) # shape (11,) 80 | tips_arm_body_com = self.fingertip.pose().xyz() # shape (3,) 81 | object_body_com = self.object.pose().xyz() # shape (3,) 82 | goal_body_com = self.target.pose().xyz() # shape (3,) 83 | 84 | return np.concatenate([ 85 | qpos.flat[:7], # self.sim.data.qpos.flat[:7], 86 | qvel.flat[:7], # self.sim.data.qvel.flat[:7], 87 | tips_arm_body_com, # self.get_body_com("tips_arm"), 88 | object_body_com, # self.get_body_com("object"), 89 | goal_body_com # self.get_body_com("goal"), 90 | ]) 91 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/manipulators/reacher.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot 2 | import numpy as np 3 | 4 | 5 | class Reacher(MJCFBasedRobot): 6 | TARG_LIMIT = 0.27 7 | 8 | def __init__(self): 9 | MJCFBasedRobot.__init__(self, 'reacher.xml', 'body0', action_dim=2, obs_dim=9) 10 | 11 | def robot_specific_reset(self, bullet_client): 12 | self.jdict["target_x"].reset_current_position( 13 | self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0) 14 | self.jdict["target_y"].reset_current_position( 15 | self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0) 16 | self.fingertip = self.parts["fingertip"] 17 | self.target = self.parts["target"] 18 | self.central_joint = self.jdict["joint0"] 19 | self.elbow_joint = self.jdict["joint1"] 20 | self.central_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 21 | self.elbow_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 22 | 23 | def apply_action(self, a): 24 | assert (np.isfinite(a).all()) 25 | self.central_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1))) 26 | self.elbow_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1))) 27 | 28 | def calc_state(self): 29 | target_x, target_vx = self.jdict["target_x"].current_position() 30 | target_y, target_vy = self.jdict["target_y"].current_position() 31 | 32 | qpos = np.array([j.current_position() for j in self.ordered_joints]) # shape (4,) 33 | qvel = np.array([j.current_relative_position()[1] for j in self.ordered_joints]) # shape (4,) # TODO: Add target pos and vel 34 | 35 | theta = qpos[:2] 36 | self.to_target_vec = np.array(self.fingertip.pose().xyz()) - np.array(self.target.pose().xyz()) # shape (3,) 37 | 38 | return np.concatenate([ 39 | np.cos(theta), # np.cos(theta), 40 | np.sin(theta), # np.sin(theta), 41 | qpos.flat[2:], # self.sim.data.qpos.flat[2:], 42 | qvel.flat[:2], # self.sim.data.qvel.flat[:2], 43 | self.to_target_vec # self.get_body_com("fingertip") - self.get_body_com("target") 44 | ]) 45 | 46 | def calc_potential(self): 47 | return -100 * np.linalg.norm(self.to_target_vec) -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/pendula/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/robots/pendula/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/pendula/inverted_double_pendulum.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot 2 | import numpy as np 3 | 4 | 5 | class InvertedDoublePendulum(MJCFBasedRobot): 6 | def __init__(self): 7 | MJCFBasedRobot.__init__(self, 'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=11) 8 | 9 | def robot_specific_reset(self, bullet_client): 10 | self._p = bullet_client 11 | self.pole2 = self.parts["pole2"] 12 | self.slider = self.jdict["slider"] 13 | self.j1 = self.jdict["hinge"] 14 | self.j2 = self.jdict["hinge2"] 15 | u = self.np_random.uniform(low=-.1, high=.1, size=[2]) 16 | self.j1.reset_current_position(float(u[0]), 0) 17 | self.j2.reset_current_position(float(u[1]), 0) 18 | self.j1.set_motor_torque(0) 19 | self.j2.set_motor_torque(0) 20 | 21 | def apply_action(self, a): 22 | assert( np.isfinite(a).all() ) 23 | self.slider.set_motor_torque( 200*float(np.clip(a[0], -1, +1)) ) 24 | 25 | def calc_state(self): 26 | x, vx = self.slider.current_position() 27 | theta, theta_dot = self.j1.current_position() 28 | gamma, gamma_dot = self.j2.current_position() 29 | 30 | assert(np.isfinite(x)) 31 | 32 | qpos = np.array([x, theta, gamma]) # shape (3,) 33 | qvel = np.array([vx, theta_dot, gamma_dot]) # shape (3,) 34 | qfrc_constraint = np.zeros(3) # shape (3,) # TODO: FIND qfrc_constraint in pybullet 35 | return np.concatenate([ 36 | qpos[:1], # self.sim.data.qpos[:1], # cart x pos 37 | np.sin(qpos[1:]), # np.sin(self.sim.data.qpos[1:]), # link angles 38 | np.cos(qpos[1:]), # np.cos(self.sim.data.qpos[1:]), 39 | np.clip(qvel, -10, 10), # np.clip(self.sim.data.qvel, -10, 10), 40 | np.clip(qfrc_constraint, -10, 10) # np.clip(self.sim.data.qfrc_constraint, -10, 10) 41 | ]).ravel() 42 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/robots/pendula/inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot 2 | import numpy as np 3 | 4 | 5 | class InvertedPendulum(MJCFBasedRobot): 6 | 7 | def __init__(self): 8 | MJCFBasedRobot.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=4) 9 | 10 | def robot_specific_reset(self, bullet_client): 11 | self._p = bullet_client 12 | self.pole = self.parts["pole"] 13 | self.slider = self.jdict["slider"] 14 | self.j1 = self.jdict["hinge"] 15 | u = self.np_random.uniform(low=-.1, high=.1) 16 | self.j1.reset_current_position(u, 0) 17 | self.j1.set_motor_torque(0) 18 | 19 | def apply_action(self, a): 20 | assert(np.isfinite(a).all()) 21 | if not np.isfinite(a).all(): 22 | print("a is inf") 23 | a[0] = 0 24 | self.slider.set_motor_torque(100*float(np.clip(a[0], -1, +1))) 25 | 26 | def calc_state(self): 27 | x, vx = self.slider.current_position() 28 | self.theta, theta_dot = self.j1.current_position() 29 | assert(np.isfinite(x)) 30 | 31 | if not np.isfinite(x): 32 | print("x is inf") 33 | x = 0 34 | 35 | if not np.isfinite(vx): 36 | print("vx is inf") 37 | vx = 0 38 | 39 | if not np.isfinite(self.theta): 40 | print("theta is inf") 41 | self.theta = 0 42 | 43 | if not np.isfinite(theta_dot): 44 | print("theta_dot is inf") 45 | theta_dot = 0 46 | 47 | qpos = np.array([x, self.theta]) # shape (2,) 48 | qvel = np.array([vx, theta_dot]) # shape (2,) 49 | 50 | return np.concatenate([ 51 | qpos, # self.sim.data.qpos 52 | qvel]).ravel() # self.sim.data.qvel 53 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/scenes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/scenes/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/scenes/scene_bases.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.path.dirname(__file__)) 5 | 6 | import gym 7 | 8 | 9 | class Scene: 10 | """A base class for single- and multiplayer scenes""" 11 | 12 | def __init__(self, bullet_client, gravity, timestep, frame_skip): 13 | self._p = bullet_client 14 | self.np_random, seed = gym.utils.seeding.np_random(None) 15 | self.timestep = timestep 16 | self.frame_skip = frame_skip 17 | 18 | self.dt = self.timestep * self.frame_skip 19 | self.cpp_world = World(self._p, gravity, timestep, frame_skip) 20 | 21 | self.test_window_still_open = True # or never opened 22 | self.human_render_detected = False # if user wants render("human"), we open test window 23 | 24 | self.multiplayer_robots = {} 25 | 26 | def test_window(self): 27 | "Call this function every frame, to see what's going on. Not necessary in learning." 28 | self.human_render_detected = True 29 | return self.test_window_still_open 30 | 31 | def actor_introduce(self, robot): 32 | "Usually after scene reset" 33 | if not self.multiplayer: return 34 | self.multiplayer_robots[robot.player_n] = robot 35 | 36 | def actor_is_active(self, robot): 37 | """ 38 | Used by robots to see if they are free to exclusiveley put their HUD on the test window. 39 | Later can be used for click-focus robots. 40 | """ 41 | return not self.multiplayer 42 | 43 | def episode_restart(self, bullet_client): 44 | "This function gets overridden by specific scene, to reset specific objects into their start positions" 45 | self.cpp_world.clean_everything() 46 | #self.cpp_world.test_window_history_reset() 47 | 48 | def global_step(self): 49 | """ 50 | The idea is: apply motor torques for all robots, then call global_step(), then collect 51 | observations from robots using step() with the same action. 52 | """ 53 | self.cpp_world.step(self.frame_skip) 54 | 55 | 56 | class SingleRobotEmptyScene(Scene): 57 | multiplayer = False # this class is used "as is" for InvertedPendulum, Reacher 58 | 59 | 60 | class World: 61 | 62 | def __init__(self, bullet_client, gravity, timestep, frame_skip): 63 | self._p = bullet_client 64 | self.gravity = gravity 65 | self.timestep = timestep 66 | self.frame_skip = frame_skip 67 | self.numSolverIterations = 5 68 | self.clean_everything() 69 | 70 | def clean_everything(self): 71 | # p.resetSimulation() 72 | self._p.setGravity(0, 0, -self.gravity) 73 | self._p.setDefaultContactERP(0.9) 74 | # print("self.numSolverIterations=",self.numSolverIterations) 75 | self._p.setPhysicsEngineParameter(fixedTimeStep=self.timestep*self.frame_skip, numSolverIterations=self.numSolverIterations, numSubSteps=self.frame_skip) 76 | 77 | def step(self, frame_skip): 78 | self._p.stepSimulation() 79 | 80 | 81 | -------------------------------------------------------------------------------- /pybulletgym/envs/mujoco/scenes/stadium.py: -------------------------------------------------------------------------------- 1 | import os, inspect 2 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 3 | parentdir = os.path.dirname(currentdir) 4 | os.sys.path.insert(0,parentdir) 5 | 6 | from .scene_bases import Scene 7 | import pybullet 8 | 9 | 10 | class StadiumScene(Scene): 11 | multiplayer = False 12 | zero_at_running_strip_start_line = True # if False, center of coordinates (0,0,0) will be at the middle of the stadium 13 | stadium_halflen = 105*0.25 # FOOBALL_FIELD_HALFLEN 14 | stadium_halfwidth = 50*0.25 # FOOBALL_FIELD_HALFWID 15 | stadiumLoaded = 0 16 | 17 | def episode_restart(self, bullet_client): 18 | self._p = bullet_client 19 | Scene.episode_restart(self, bullet_client) 20 | if self.stadiumLoaded == 0: 21 | self.stadiumLoaded = 1 22 | 23 | # stadium_pose = cpp_household.Pose() 24 | # if self.zero_at_running_strip_start_line: 25 | # stadium_pose.set_xyz(27, 21, 0) # see RUN_STARTLINE, RUN_RAD constants 26 | 27 | filename = os.path.join(os.path.dirname(__file__), "..", "assets", "scenes", "stadium", "plane_stadium.sdf") 28 | self.ground_plane_mjcf=self._p.loadSDF(filename) 29 | #filename = os.path.join(pybullet_data.getDataPath(),"stadium_no_collision.sdf") 30 | #self.ground_plane_mjcf = p.loadSDF(filename) 31 | # 32 | for i in self.ground_plane_mjcf: 33 | self._p.changeDynamics(i,-1,lateralFriction=0.8, restitution=0.5) 34 | self._p.changeVisualShape(i,-1,rgbaColor=[1,1,1,0.8]) 35 | self._p.configureDebugVisualizer(pybullet.COV_ENABLE_PLANAR_REFLECTION,1) 36 | 37 | # for j in range(pybullet.getNumJoints(i)): 38 | # self._p.changeDynamics(i,j,lateralFriction=0) 39 | #despite the name (stadium_no_collision), it DID have collision, so don't add duplicate ground -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/envs/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/env_bases.py: -------------------------------------------------------------------------------- 1 | import gym, gym.spaces, gym.utils, gym.utils.seeding 2 | import numpy as np 3 | import pybullet 4 | from pybullet_utils import bullet_client 5 | 6 | from pkg_resources import parse_version 7 | 8 | 9 | class BaseBulletEnv(gym.Env): 10 | """ 11 | Base class for Bullet physics simulation environments in a Scene. 12 | These environments create single-player scenes and behave like normal Gym environments, if 13 | you don't use multiplayer. 14 | """ 15 | 16 | metadata = { 17 | 'render.modes': ['human', 'rgb_array'], 18 | 'video.frames_per_second': 60 19 | } 20 | 21 | def __init__(self, robot, render=False): 22 | self.scene = None 23 | self.physicsClientId = -1 24 | self.ownsPhysicsClient = 0 25 | self.camera = Camera() 26 | self.isRender = render 27 | self.robot = robot 28 | self._seed() 29 | self._cam_dist = 3 30 | self._cam_yaw = 0 31 | self._cam_pitch = -30 32 | self._render_width = 320 33 | self._render_height = 240 34 | 35 | self.action_space = robot.action_space 36 | self.observation_space = robot.observation_space 37 | 38 | def configure(self, args): 39 | self.robot.args = args 40 | 41 | def _seed(self, seed=None): 42 | self.np_random, seed = gym.utils.seeding.np_random(seed) 43 | self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env 44 | return [seed] 45 | 46 | def _reset(self): 47 | if self.physicsClientId < 0: 48 | self.ownsPhysicsClient = True 49 | 50 | if self.isRender: 51 | self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI) 52 | else: 53 | self._p = bullet_client.BulletClient() 54 | 55 | self.physicsClientId = self._p._client 56 | self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI,0) 57 | 58 | if self.scene is None: 59 | self.scene = self.create_single_player_scene(self._p) 60 | if not self.scene.multiplayer and self.ownsPhysicsClient: 61 | self.scene.episode_restart(self._p) 62 | 63 | self.robot.scene = self.scene 64 | 65 | self.frame = 0 66 | self.done = 0 67 | self.reward = 0 68 | dump = 0 69 | s = self.robot.reset(self._p) 70 | self.potential = self.robot.calc_potential() 71 | return s 72 | 73 | def _render(self, mode, close=False): 74 | if mode == "human": 75 | self.isRender = True 76 | if mode != "rgb_array": 77 | return np.array([]) 78 | 79 | base_pos = [0,0,0] 80 | if hasattr(self,'robot'): 81 | if hasattr(self.robot,'body_xyz'): 82 | base_pos = self.robot.body_xyz 83 | 84 | view_matrix = self._p.computeViewMatrixFromYawPitchRoll( 85 | cameraTargetPosition=base_pos, 86 | distance=self._cam_dist, 87 | yaw=self._cam_yaw, 88 | pitch=self._cam_pitch, 89 | roll=0, 90 | upAxisIndex=2) 91 | proj_matrix = self._p.computeProjectionMatrixFOV( 92 | fov=60, aspect=float(self._render_width)/self._render_height, 93 | nearVal=0.1, farVal=100.0) 94 | (_, _, px, _, _) = self._p.getCameraImage( 95 | width=self._render_width, height=self._render_height, viewMatrix=view_matrix, 96 | projectionMatrix=proj_matrix, 97 | renderer=pybullet.ER_BULLET_HARDWARE_OPENGL 98 | ) 99 | rgb_array = np.array(px) 100 | rgb_array = rgb_array[:, :, :3] 101 | return rgb_array 102 | 103 | def _close(self): 104 | if self.ownsPhysicsClient: 105 | if self.physicsClientId >= 0: 106 | self._p.disconnect() 107 | self.physicsClientId = -1 108 | 109 | def HUD(self, state, a, done): 110 | pass 111 | 112 | # backwards compatibility for gym >= v0.9.x 113 | # for extension of this class. 114 | def step(self, *args, **kwargs): 115 | return self._step(*args, **kwargs) 116 | 117 | if parse_version(gym.__version__)>=parse_version('0.9.6'): 118 | close = _close 119 | render = _render 120 | reset = _reset 121 | seed = _seed 122 | 123 | 124 | class Camera: 125 | def __init__(self): 126 | pass 127 | 128 | def move_and_look_at(self, i, j, k, x, y, z): 129 | lookat = [x, y, z] 130 | distance = 10 131 | yaw = 10 132 | self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat) 133 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/locomotion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/envs/locomotion/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/locomotion/ant_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.locomotors import Ant 3 | 4 | 5 | class AntBulletEnv(WalkerBaseBulletEnv): 6 | def __init__(self): 7 | self.robot = Ant() 8 | WalkerBaseBulletEnv.__init__(self, self.robot) 9 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/locomotion/atlas_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.locomotors import Atlas 3 | from pybulletgym.envs.roboschool.scenes import StadiumScene 4 | 5 | 6 | class AtlasBulletEnv(WalkerBaseBulletEnv): 7 | def __init__(self): 8 | self.robot = Atlas() 9 | WalkerBaseBulletEnv.__init__(self, self.robot) 10 | 11 | def create_single_player_scene(self, bullet_client): 12 | self.stadium_scene = StadiumScene(bullet_client, gravity=9.8, timestep=0.0165/8, frame_skip=8) # 8 instead of 4 here 13 | return self.stadium_scene 14 | 15 | def robot_specific_reset(self): 16 | self.robot.robot_specific_reset() 17 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/locomotion/half_cheetah_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.locomotors import HalfCheetah 3 | 4 | 5 | class HalfCheetahBulletEnv(WalkerBaseBulletEnv): 6 | def __init__(self): 7 | self.robot = HalfCheetah() 8 | WalkerBaseBulletEnv.__init__(self, self.robot) 9 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/locomotion/hopper_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.locomotors import Hopper 3 | 4 | 5 | class HopperBulletEnv(WalkerBaseBulletEnv): 6 | def __init__(self): 7 | self.robot = Hopper() 8 | WalkerBaseBulletEnv.__init__(self, self.robot) 9 | 10 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/locomotion/humanoid_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.locomotors import Humanoid 3 | 4 | 5 | class HumanoidBulletEnv(WalkerBaseBulletEnv): 6 | def __init__(self, robot=None): 7 | self.robot = robot if robot is not None else Humanoid() 8 | WalkerBaseBulletEnv.__init__(self, self.robot) 9 | self.electricity_cost = 4.25 * WalkerBaseBulletEnv.electricity_cost 10 | self.stall_torque_cost = 4.25 * WalkerBaseBulletEnv.stall_torque_cost 11 | 12 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/locomotion/humanoid_flagrun_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.locomotion.humanoid_env import HumanoidBulletEnv 2 | from pybulletgym.envs.roboschool.robots.locomotors import HumanoidFlagrun, HumanoidFlagrunHarder 3 | 4 | 5 | class HumanoidFlagrunBulletEnv(HumanoidBulletEnv): 6 | random_yaw = True 7 | 8 | def __init__(self): 9 | self.robot = HumanoidFlagrun() 10 | HumanoidBulletEnv.__init__(self, self.robot) 11 | 12 | def create_single_player_scene(self, bullet_client): 13 | s = HumanoidBulletEnv.create_single_player_scene(self, bullet_client) 14 | s.zero_at_running_strip_start_line = False 15 | return s 16 | 17 | 18 | class HumanoidFlagrunHarderBulletEnv(HumanoidBulletEnv): 19 | random_lean = True # can fall on start 20 | 21 | def __init__(self): 22 | self.robot = HumanoidFlagrunHarder() 23 | self.electricity_cost /= 4 # don't care that much about electricity, just stand up! 24 | HumanoidBulletEnv.__init__(self, self.robot) 25 | 26 | def create_single_player_scene(self, bullet_client): 27 | s = HumanoidBulletEnv.create_single_player_scene(self, bullet_client) 28 | s.zero_at_running_strip_start_line = False 29 | return s 30 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/locomotion/walker2d_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.locomotors import Walker2D 3 | 4 | 5 | class Walker2DBulletEnv(WalkerBaseBulletEnv): 6 | def __init__(self): 7 | self.robot = Walker2D() 8 | WalkerBaseBulletEnv.__init__(self, self.robot) 9 | 10 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/manipulation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/envs/manipulation/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/manipulation/pusher_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.manipulators.pusher import Pusher 3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene 4 | import numpy as np 5 | 6 | 7 | class PusherBulletEnv(BaseBulletEnv): 8 | def __init__(self): 9 | self.robot = Pusher() 10 | BaseBulletEnv.__init__(self, self.robot) 11 | 12 | def create_single_player_scene(self, bullet_client): 13 | return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5) 14 | 15 | def step(self, a): 16 | self.robot.apply_action(a) 17 | self.scene.global_step() 18 | 19 | state = self.robot.calc_state() # sets self.to_target_vec 20 | 21 | potential_old = self.potential 22 | self.potential = self.robot.calc_potential() 23 | 24 | joint_vel = np.array([ 25 | self.robot.shoulder_pan_joint.get_velocity(), 26 | self.robot.shoulder_lift_joint.get_velocity(), 27 | self.robot.upper_arm_roll_joint.get_velocity(), 28 | self.robot.elbow_flex_joint.get_velocity(), 29 | self.robot.forearm_roll_joint.get_velocity(), 30 | self.robot.wrist_flex_joint.get_velocity(), 31 | self.robot.wrist_roll_joint.get_velocity() 32 | ]) 33 | 34 | action_product = np.matmul(np.abs(a), np.abs(joint_vel)) 35 | action_sum = np.sum(a) 36 | 37 | electricity_cost = ( 38 | -0.10 * action_product # work torque*angular_velocity 39 | - 0.01 * action_sum # stall torque require some energy 40 | ) 41 | 42 | stuck_joint_cost = 0 43 | for j in self.robot.ordered_joints: 44 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01: 45 | stuck_joint_cost += -0.1 46 | 47 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)] 48 | self.HUD(state, a, False) 49 | return state, sum(self.rewards), False, {} 50 | 51 | def calc_potential(self): 52 | return -100 * np.linalg.norm(self.to_target_vec) 53 | 54 | def camera_adjust(self): 55 | x, y, z = self.robot.fingertip.pose().xyz() 56 | x *= 0.5 57 | y *= 0.5 58 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z) 59 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/manipulation/reacher_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene 3 | import numpy as np 4 | from pybulletgym.envs.roboschool.robots.manipulators.reacher import Reacher 5 | 6 | 7 | class ReacherBulletEnv(BaseBulletEnv): 8 | def __init__(self): 9 | self.robot = Reacher() 10 | BaseBulletEnv.__init__(self, self.robot) 11 | 12 | def create_single_player_scene(self, bullet_client): 13 | return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0165, frame_skip=1) 14 | 15 | def step(self, a): 16 | assert (not self.scene.multiplayer) 17 | self.robot.apply_action(a) 18 | self.scene.global_step() 19 | 20 | state = self.robot.calc_state() # sets self.to_target_vec 21 | 22 | potential_old = self.potential 23 | self.potential = self.robot.calc_potential() 24 | 25 | electricity_cost = ( 26 | -0.10 * (np.abs(a[0] * self.robot.theta_dot) + np.abs(a[1] * self.robot.gamma_dot)) # work torque*angular_velocity 27 | - 0.01 * (np.abs(a[0]) + np.abs(a[1])) # stall torque require some energy 28 | ) 29 | stuck_joint_cost = -0.1 if np.abs(np.abs(self.robot.gamma) - 1) < 0.01 else 0.0 30 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)] 31 | self.HUD(state, a, False) 32 | return state, sum(self.rewards), False, {} 33 | 34 | def camera_adjust(self): 35 | x, y, z = self.robot.fingertip.pose().xyz() 36 | x *= 0.5 37 | y *= 0.5 38 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z) 39 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/manipulation/striker_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.manipulators.striker import Striker 3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene 4 | import numpy as np 5 | 6 | 7 | class StrikerBulletEnv(BaseBulletEnv): 8 | def __init__(self): 9 | self.robot = Striker() 10 | BaseBulletEnv.__init__(self, self.robot) 11 | self._striked = False 12 | self._min_strike_dist = np.inf 13 | self.strike_threshold = 0.1 14 | 15 | def create_single_player_scene(self, bullet_client): 16 | return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5) 17 | 18 | def step(self, a): 19 | self.robot.apply_action(a) 20 | self.scene.global_step() 21 | 22 | state = self.robot.calc_state() # sets self.to_target_vec 23 | 24 | potential_old = self.potential 25 | self.potential = self.robot.calc_potential() 26 | 27 | joint_vel = np.array([ 28 | self.robot.shoulder_pan_joint.get_velocity(), 29 | self.robot.shoulder_lift_joint.get_velocity(), 30 | self.robot.upper_arm_roll_joint.get_velocity(), 31 | self.robot.elbow_flex_joint.get_velocity(), 32 | self.robot.forearm_roll_joint.get_velocity(), 33 | self.robot.wrist_flex_joint.get_velocity(), 34 | self.robot.wrist_roll_joint.get_velocity() 35 | ]) 36 | 37 | action_product = np.matmul(np.abs(a), np.abs(joint_vel)) 38 | action_sum = np.sum(a) 39 | 40 | electricity_cost = ( 41 | -0.10 * action_product # work torque*angular_velocity 42 | - 0.01 * action_sum # stall torque require some energy 43 | ) 44 | 45 | stuck_joint_cost = 0 46 | for j in self.robot.ordered_joints: 47 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01: 48 | stuck_joint_cost += -0.1 49 | 50 | dist_object_finger = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz() 51 | reward_dist_vec = self.robot.object.pose().xyz() - self.robot.target.pose().xyz() # TODO: Should the object and target really belong to the robot? Maybe split this off 52 | 53 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(reward_dist_vec)) 54 | 55 | if np.linalg.norm(dist_object_finger) < self.strike_threshold: 56 | self._striked = True 57 | self._strike_pos = self.robot.fingertip.pose().xyz() 58 | 59 | if self._striked: 60 | reward_near_vec = self.robot.object.pose().xyz() - self._strike_pos 61 | else: 62 | reward_near_vec = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz() 63 | 64 | reward_near = - np.linalg.norm(reward_near_vec) 65 | 66 | reward_dist = - np.linalg.norm(self._min_strike_dist) 67 | reward_ctrl = - np.square(a).sum() 68 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost), 69 | 3 * reward_dist, 0.1 * reward_ctrl, 0.5 * reward_near] 70 | self.HUD(state, a, False) 71 | return state, sum(self.rewards), False, {} 72 | 73 | def calc_potential(self): 74 | return -100 * np.linalg.norm(self.to_target_vec) 75 | 76 | def camera_adjust(self): 77 | x, y, z = self.robot.fingertip.pose().xyz() 78 | x *= 0.5 79 | y *= 0.5 80 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z) 81 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/manipulation/thrower_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.manipulators.thrower import Thrower 3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene 4 | import numpy as np 5 | 6 | 7 | class ThrowerBulletEnv(BaseBulletEnv): 8 | def __init__(self): 9 | self.robot = Thrower() 10 | BaseBulletEnv.__init__(self, self.robot) 11 | 12 | def create_single_player_scene(self, bullet_client): 13 | return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0020, frame_skip=5) 14 | 15 | def step(self, a): 16 | self.robot.apply_action(a) 17 | self.scene.global_step() 18 | state = self.robot.calc_state() # sets self.to_target_vec 19 | 20 | potential_old = self.potential 21 | self.potential = self.robot.calc_potential() 22 | 23 | joint_vel = np.array([ 24 | self.robot.shoulder_pan_joint.get_velocity(), 25 | self.robot.shoulder_lift_joint.get_velocity(), 26 | self.robot.upper_arm_roll_joint.get_velocity(), 27 | self.robot.elbow_flex_joint.get_velocity(), 28 | self.robot.forearm_roll_joint.get_velocity(), 29 | self.robot.wrist_flex_joint.get_velocity(), 30 | self.robot.wrist_roll_joint.get_velocity() 31 | ]) 32 | 33 | action_product = np.matmul(np.abs(a), np.abs(joint_vel)) 34 | action_sum = np.sum(a) 35 | 36 | electricity_cost = ( 37 | -0.10 * action_product # work torque*angular_velocity 38 | - 0.01 * action_sum # stall torque require some energy 39 | ) 40 | 41 | stuck_joint_cost = 0 42 | for j in self.robot.ordered_joints: 43 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01: 44 | stuck_joint_cost += -0.1 45 | 46 | object_xy = self.robot.object.pose().xyz()[:2] 47 | target_xy = self.robot.target.pose().xyz()[:2] 48 | 49 | if not self.robot._object_hit_ground and self.robot.object.pose().xyz()[2] < -0.25: # TODO: Should the object and target really belong to the robot? Maybe split this off 50 | self.robot._object_hit_ground = True 51 | self.robot._object_hit_location = self.robot.object.pose().xyz() 52 | 53 | if self.robot._object_hit_ground: 54 | object_hit_xy = self.robot._object_hit_location[:2] 55 | reward_dist = -np.linalg.norm(object_hit_xy - target_xy) 56 | else: 57 | reward_dist = -np.linalg.norm(object_xy - target_xy) 58 | reward_ctrl = - np.square(a).sum() 59 | 60 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost), 61 | reward_dist, 0.002 * reward_ctrl] 62 | self.HUD(state, a, False) 63 | return state, sum(self.rewards), False, {} 64 | 65 | def camera_adjust(self): 66 | x, y, z = self.robot.fingertip.pose().xyz() 67 | x *= 0.5 68 | y *= 0.5 69 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z) 70 | 71 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/pendulum/__init__.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.pendulum.inverted_pendulum_env import InvertedPendulumBulletEnv, InvertedPendulumSwingupBulletEnv 2 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/pendulum/inverted_double_pendulum_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.pendula.inverted_double_pendulum import InvertedDoublePendulum 3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene 4 | 5 | 6 | class InvertedDoublePendulumBulletEnv(BaseBulletEnv): 7 | def __init__(self): 8 | self.robot = InvertedDoublePendulum() 9 | BaseBulletEnv.__init__(self, self.robot) 10 | self.stateId = -1 11 | 12 | def create_single_player_scene(self, bullet_client): 13 | return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) 14 | 15 | def reset(self): 16 | if self.stateId >= 0: 17 | self._p.restoreState(self.stateId) 18 | r = BaseBulletEnv._reset(self) 19 | if self.stateId < 0: 20 | self.stateId = self._p.saveState() 21 | return r 22 | 23 | def step(self, a): 24 | self.robot.apply_action(a) 25 | self.scene.global_step() 26 | state = self.robot.calc_state() # sets self.pos_x self.pos_y 27 | # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9 28 | # using tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3 29 | dist_penalty = 0.01 * self.robot.pos_x ** 2 + (self.robot.pos_y + 0.3 - 2) ** 2 30 | # v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040 31 | # vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 32 | vel_penalty = 0 33 | alive_bonus = 10 34 | done = self.robot.pos_y + 0.3 <= 1 35 | self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)] 36 | self.HUD(state, a, done) 37 | return state, sum(self.rewards), done, {} 38 | 39 | def camera_adjust(self): 40 | self.camera.move_and_look_at(0,1.2,1.2, 0,0,0.5) 41 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/envs/pendulum/inverted_pendulum_env.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv 2 | from pybulletgym.envs.roboschool.robots.pendula.interted_pendulum import InvertedPendulum, InvertedPendulumSwingup 3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene 4 | import numpy as np 5 | 6 | 7 | class InvertedPendulumBulletEnv(BaseBulletEnv): 8 | def __init__(self): 9 | self.robot = InvertedPendulum() 10 | BaseBulletEnv.__init__(self, self.robot) 11 | self.stateId = -1 12 | 13 | def create_single_player_scene(self, bullet_client): 14 | return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) 15 | 16 | def reset(self): 17 | if self.stateId >= 0: 18 | # print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")") 19 | self._p.restoreState(self.stateId) 20 | r = BaseBulletEnv._reset(self) 21 | if self.stateId < 0: 22 | self.stateId = self._p.saveState() 23 | # print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId) 24 | return r 25 | 26 | def step(self, a): 27 | self.robot.apply_action(a) 28 | self.scene.global_step() 29 | state = self.robot.calc_state() # sets self.pos_x self.pos_y 30 | vel_penalty = 0 31 | if self.robot.swingup: 32 | reward = np.cos(self.robot.theta) 33 | done = False 34 | else: 35 | reward = 1.0 36 | done = np.abs(self.robot.theta) > .2 37 | self.rewards = [float(reward)] 38 | self.HUD(state, a, done) 39 | return state, sum(self.rewards), done, {} 40 | 41 | def camera_adjust(self): 42 | self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5) 43 | 44 | 45 | class InvertedPendulumSwingupBulletEnv(InvertedPendulumBulletEnv): 46 | def __init__(self): 47 | self.robot = InvertedPendulumSwingup() 48 | BaseBulletEnv.__init__(self, self.robot) 49 | self.stateId = -1 50 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/robots/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/locomotors/__init__.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.locomotors.ant import Ant 2 | from pybulletgym.envs.roboschool.robots.locomotors.atlas import Atlas 3 | from pybulletgym.envs.roboschool.robots.locomotors.half_cheetah import HalfCheetah 4 | from pybulletgym.envs.roboschool.robots.locomotors.hopper import Hopper 5 | from pybulletgym.envs.roboschool.robots.locomotors.humanoid import Humanoid 6 | from pybulletgym.envs.roboschool.robots.locomotors.humanoid_flagrun import HumanoidFlagrun, HumanoidFlagrunHarder 7 | from pybulletgym.envs.roboschool.robots.locomotors.walker2d import Walker2D 8 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/locomotors/ant.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 3 | 4 | 5 | class Ant(WalkerBase, MJCFBasedRobot): 6 | foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot'] 7 | 8 | def __init__(self): 9 | WalkerBase.__init__(self, power=2.5) 10 | MJCFBasedRobot.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=28) 11 | 12 | def alive_bonus(self, z, pitch): 13 | return +1 if z > 0.26 else -1 # 0.25 is central sphere rad, die if it scrapes the ground 14 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/locomotors/atlas.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.roboschool.robots.robot_bases import URDFBasedRobot 3 | import numpy as np 4 | import pybullet as p 5 | 6 | 7 | class Atlas(WalkerBase, URDFBasedRobot): 8 | random_yaw = False 9 | foot_list = ["r_foot", "l_foot"] 10 | 11 | def __init__(self): 12 | WalkerBase.__init__(self, power=2.9) 13 | URDFBasedRobot.__init__(self, "atlas/atlas_description/atlas_v4_with_multisense.urdf", "pelvis", action_dim=30, obs_dim=70) 14 | 15 | def alive_bonus(self, z, pitch): 16 | # This is debug code to fix unwanted self-collisions: 17 | #for part in self.parts.values(): 18 | # contact_names = set(x.name for x in part.contact_list()) 19 | # if contact_names: 20 | # print("CONTACT OF '%s' WITH '%s'" % (part.name, ",".join(contact_names)) ) 21 | 22 | x, y, z = self.head.pose().xyz() 23 | # Failure mode: robot doesn't bend knees, tries to walk using hips. 24 | # We fix that by a bit of reward engineering. 25 | knees = np.array([j.current_relative_position() for j in [self.jdict["l_leg_kny"], self.jdict["r_leg_kny"]]], dtype=np.float32).flatten() 26 | knees_at_limit = np.count_nonzero(np.abs(knees[0::2]) > 0.99) 27 | return +4-knees_at_limit if z > 1.3 else -1 28 | 29 | def robot_specific_reset(self, bullet_client): 30 | WalkerBase.robot_specific_reset(self, bullet_client) 31 | self.set_initial_orientation(yaw_center=0, yaw_random_spread=np.pi) 32 | self.head = self.parts["head"] 33 | 34 | def set_initial_orientation(self, yaw_center, yaw_random_spread): 35 | if not self.random_yaw: 36 | yaw = yaw_center 37 | else: 38 | yaw = yaw_center + self.np_random.uniform(low=-yaw_random_spread, high=yaw_random_spread) 39 | 40 | position = [self.start_pos_x, self.start_pos_y, self.start_pos_z + 1.0] 41 | orientation = [0, 0, yaw] # just face random direction, but stay straight otherwise 42 | self.robot_body.reset_pose(position, p.getQuaternionFromEuler(orientation)) 43 | self.initial_z = 1.5 44 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/locomotors/half_cheetah.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 3 | import numpy as np 4 | 5 | 6 | class HalfCheetah(WalkerBase, MJCFBasedRobot): 7 | foot_list = ["ffoot", "fshin", "fthigh", "bfoot", "bshin", "bthigh"] # track these contacts with ground 8 | 9 | def __init__(self): 10 | WalkerBase.__init__(self, power=0.90) 11 | MJCFBasedRobot.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=26) 12 | 13 | def alive_bonus(self, z, pitch): 14 | # Use contact other than feet to terminate episode: due to a lot of strange walks using knees 15 | return +1 if np.abs(pitch) < 1.0 and not self.feet_contact[1] and not self.feet_contact[2] and not self.feet_contact[4] and not self.feet_contact[5] else -1 16 | 17 | def robot_specific_reset(self, bullet_client): 18 | WalkerBase.robot_specific_reset(self, bullet_client) 19 | self.jdict["bthigh"].power_coef = 120.0 20 | self.jdict["bshin"].power_coef = 90.0 21 | self.jdict["bfoot"].power_coef = 60.0 22 | self.jdict["fthigh"].power_coef = 140.0 23 | self.jdict["fshin"].power_coef = 60.0 24 | self.jdict["ffoot"].power_coef = 30.0 25 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/locomotors/hopper.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 3 | 4 | 5 | class Hopper(WalkerBase, MJCFBasedRobot): 6 | foot_list = ["foot"] 7 | 8 | def __init__(self): 9 | WalkerBase.__init__(self, power=0.75) 10 | MJCFBasedRobot.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=15) 11 | 12 | def alive_bonus(self, z, pitch): 13 | return +1 if z > 0.8 and abs(pitch) < 1.0 else -1 14 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/locomotors/humanoid.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 3 | import numpy as np 4 | 5 | 6 | class Humanoid(WalkerBase, MJCFBasedRobot): 7 | self_collision = True 8 | foot_list = ["right_foot", "left_foot"] # "left_hand", "right_hand" 9 | 10 | def __init__(self, random_yaw=False, random_lean=False): 11 | WalkerBase.__init__(self, power=0.41) 12 | MJCFBasedRobot.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=44) 13 | # 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25 14 | self.random_yaw = random_yaw 15 | self.random_lean = random_lean 16 | 17 | def robot_specific_reset(self, bullet_client): 18 | WalkerBase.robot_specific_reset(self, bullet_client) 19 | self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"] 20 | self.motor_power = [100, 100, 100] 21 | self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"] 22 | self.motor_power += [100, 100, 300, 200] 23 | self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"] 24 | self.motor_power += [100, 100, 300, 200] 25 | self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"] 26 | self.motor_power += [75, 75, 75] 27 | self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"] 28 | self.motor_power += [75, 75, 75] 29 | self.motors = [self.jdict[n] for n in self.motor_names] 30 | if self.random_yaw: 31 | position = [0,0,0] 32 | orientation = [0,0,0] 33 | yaw = self.np_random.uniform(low=-3.14, high=3.14) 34 | if self.random_lean and self.np_random.randint(2) == 0: 35 | if self.np_random.randint(2) == 0: 36 | pitch = np.pi/2 37 | position = [0, 0, 0.45] 38 | else: 39 | pitch = np.pi*3/2 40 | position = [0, 0, 0.25] 41 | roll = 0 42 | orientation = [roll, pitch, yaw] 43 | else: 44 | position = [0, 0, 1.4] 45 | orientation = [0, 0, yaw] # just face random direction, but stay straight otherwise 46 | self.robot_body.reset_position(position) 47 | self.robot_body.reset_orientation(p.getQuaternionFromEuler(orientation)) 48 | self.initial_z = 0.8 49 | 50 | def apply_action(self, a): 51 | assert(np.isfinite(a).all()) 52 | force_gain = 1 53 | for i, m, power in zip(range(17), self.motors, self.motor_power): 54 | m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1))) 55 | 56 | def alive_bonus(self, z, pitch): 57 | return +2 if z > 0.78 else -1 # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying 58 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/locomotors/walker2d.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase 2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 3 | 4 | 5 | class Walker2D(WalkerBase, MJCFBasedRobot): 6 | foot_list = ["foot", "foot_left"] 7 | 8 | def __init__(self): 9 | WalkerBase.__init__(self, power=0.40) 10 | MJCFBasedRobot.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=22) 11 | 12 | def alive_bonus(self, z, pitch): 13 | return +1 if z > 0.8 and abs(pitch) < 1.0 else -1 14 | 15 | def robot_specific_reset(self, bullet_client): 16 | WalkerBase.robot_specific_reset(self, bullet_client) 17 | for n in ["foot_joint", "foot_left_joint"]: 18 | self.jdict[n].power_coef = 30.0 19 | 20 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/locomotors/walker_base.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.robot_bases import XmlBasedRobot 2 | import numpy as np 3 | 4 | 5 | class WalkerBase(XmlBasedRobot): 6 | def __init__(self, power): 7 | self.power = power 8 | self.camera_x = 0 9 | self.start_pos_x, self.start_pos_y, self.start_pos_z = 0, 0, 0 10 | self.walk_target_x = 1e3 # kilometer away 11 | self.walk_target_y = 0 12 | self.body_xyz = [0, 0, 0] 13 | 14 | def robot_specific_reset(self, bullet_client): 15 | self._p = bullet_client 16 | for j in self.ordered_joints: 17 | j.reset_current_position(self.np_random.uniform(low=-0.1, high=0.1), 0) 18 | 19 | self.feet = [self.parts[f] for f in self.foot_list] 20 | self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32) 21 | try: 22 | self.scene.actor_introduce(self) 23 | except AttributeError: 24 | pass 25 | self.initial_z = None 26 | 27 | def apply_action(self, a): 28 | assert (np.isfinite(a).all()) 29 | for n, j in enumerate(self.ordered_joints): 30 | j.set_motor_torque(self.power * j.power_coef * float(np.clip(a[n], -1, +1))) 31 | 32 | def calc_state(self): 33 | j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten() 34 | # even elements [0::2] position, scaled to -1..+1 between limits 35 | # odd elements [1::2] angular speed, scaled to show -1..+1 36 | self.joint_speeds = j[1::2] 37 | self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99) 38 | 39 | body_pose = self.robot_body.pose() 40 | parts_xyz = np.array([p.pose().xyz() for p in self.parts.values()]).flatten() 41 | self.body_xyz = ( 42 | parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2]) # torso z is more informative than mean z 43 | self.body_rpy = body_pose.rpy() 44 | z = self.body_xyz[2] 45 | if self.initial_z is None: 46 | self.initial_z = z 47 | r, p, yaw = self.body_rpy 48 | self.walk_target_theta = np.arctan2(self.walk_target_y - self.body_xyz[1], 49 | self.walk_target_x - self.body_xyz[0]) 50 | self.walk_target_dist = np.linalg.norm( 51 | [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]]) 52 | angle_to_target = self.walk_target_theta - yaw 53 | 54 | rot_speed = np.array( 55 | [[np.cos(-yaw), -np.sin(-yaw), 0], 56 | [np.sin(-yaw), np.cos(-yaw), 0], 57 | [ 0, 0, 1]] 58 | ) 59 | vx, vy, vz = np.dot(rot_speed, self.robot_body.speed()) # rotate speed back to body point of view 60 | 61 | more = np.array([z-self.initial_z, 62 | np.sin(angle_to_target), np.cos(angle_to_target), 63 | 0.3 * vx, 0.3 * vy, 0.3 * vz, # 0.3 is just scaling typical speed into -1..+1, no physical sense here 64 | r, p], dtype=np.float32) 65 | return np.clip(np.concatenate([more] + [j] + [self.feet_contact]), -5, +5) 66 | 67 | def calc_potential(self): 68 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), 69 | # all rewards have rew/frame units and close to 1.0 70 | try: 71 | debugmode = 0 72 | if debugmode: 73 | print("calc_potential: self.walk_target_dist") 74 | print(self.walk_target_dist) 75 | print("self.scene.dt") 76 | print(self.scene.dt) 77 | print("self.scene.frame_skip") 78 | print(self.scene.frame_skip) 79 | print("self.scene.timestep") 80 | print(self.scene.timestep) 81 | return - self.walk_target_dist / self.scene.dt 82 | except AttributeError: 83 | return - self.walk_target_dist 84 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/manipulators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/robots/manipulators/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/manipulators/pusher.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 2 | import numpy as np 3 | 4 | 5 | class Pusher(MJCFBasedRobot): 6 | min_target_placement_radius = 0.5 7 | max_target_placement_radius = 0.8 8 | min_object_to_target_distance = 0.1 9 | max_object_to_target_distance = 0.4 10 | 11 | def __init__(self): 12 | MJCFBasedRobot.__init__(self, 'pusher.xml', 'body0', action_dim=7, obs_dim=55) 13 | 14 | def robot_specific_reset(self, bullet_client): 15 | # parts 16 | self.fingertip = self.parts["tips_arm"] 17 | self.target = self.parts["goal"] 18 | self.object = self.parts["object"] 19 | 20 | # joints 21 | self.shoulder_pan_joint = self.jdict["r_shoulder_pan_joint"] 22 | self.shoulder_lift_joint = self.jdict["r_shoulder_lift_joint"] 23 | self.upper_arm_roll_joint = self.jdict["r_upper_arm_roll_joint"] 24 | self.elbow_flex_joint = self.jdict["r_elbow_flex_joint"] 25 | self.forearm_roll_joint = self.jdict["r_forearm_roll_joint"] 26 | self.wrist_flex_joint = self.jdict["r_wrist_flex_joint"] 27 | self.wrist_roll_joint = self.jdict["r_wrist_roll_joint"] 28 | 29 | self.target_pos = np.concatenate([ 30 | self.np_random.uniform(low=-1, high=1, size=1), 31 | self.np_random.uniform(low=-1, high=1, size=1) 32 | ]) 33 | 34 | # make length of vector between min and max_target_placement_radius 35 | self.target_pos = self.target_pos \ 36 | / np.linalg.norm(self.target_pos) \ 37 | * self.np_random.uniform(low=self.min_target_placement_radius, 38 | high=self.max_target_placement_radius, size=1) 39 | 40 | self.object_pos = np.concatenate([ 41 | self.np_random.uniform(low=-1, high=1, size=1), 42 | self.np_random.uniform(low=-1, high=1, size=1) 43 | ]) 44 | 45 | # make length of vector between min and max_object_to_target_distance 46 | self.object_pos = self.object_pos \ 47 | / np.linalg.norm(self.object_pos - self.target_pos) \ 48 | * self.np_random.uniform(low=self.min_object_to_target_distance, 49 | high=self.max_object_to_target_distance, size=1) 50 | 51 | # set position of objects 52 | self.zero_offset = np.array([0.45, 0.55]) 53 | self.jdict["goal_slidex"].reset_current_position(self.target_pos[0] - self.zero_offset[0], 0) 54 | self.jdict["goal_slidey"].reset_current_position(self.target_pos[1] - self.zero_offset[1], 0) 55 | self.jdict["obj_slidex"].reset_current_position(self.object_pos[0] - self.zero_offset[0], 0) 56 | self.jdict["obj_slidey"].reset_current_position(self.object_pos[1] - self.zero_offset[1], 0) 57 | 58 | # randomize all joints TODO: Will this work or do we have to constrain this resetting in some way? 59 | self.shoulder_pan_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 60 | self.shoulder_lift_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 61 | self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 62 | self.elbow_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 63 | self.forearm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 64 | self.wrist_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 65 | self.wrist_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 66 | 67 | def apply_action(self, a): 68 | assert (np.isfinite(a).all()) 69 | self.shoulder_pan_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1))) 70 | self.shoulder_lift_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1))) 71 | self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[2], -1, +1))) 72 | self.elbow_flex_joint.set_motor_torque(0.05 * float(np.clip(a[3], -1, +1))) 73 | self.forearm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[4], -1, +1))) 74 | self.wrist_flex_joint.set_motor_torque(0.05 * float(np.clip(a[5], -1, +1))) 75 | self.wrist_roll_joint.set_motor_torque(0.05 * float(np.clip(a[6], -1, +1))) 76 | 77 | def calc_state(self): 78 | self.to_target_vec = self.target_pos - self.object_pos 79 | return np.concatenate([ 80 | np.array([j.current_position() for j in self.ordered_joints]).flatten(), # all positions 81 | np.array([j.current_relative_position() for j in self.ordered_joints]).flatten(), # all speeds 82 | self.to_target_vec, 83 | self.fingertip.pose().xyz(), 84 | self.object.pose().xyz(), 85 | self.target.pose().xyz(), 86 | ]) 87 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/manipulators/reacher.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 2 | import numpy as np 3 | 4 | 5 | class Reacher(MJCFBasedRobot): 6 | TARG_LIMIT = 0.27 7 | 8 | def __init__(self): 9 | MJCFBasedRobot.__init__(self, 'reacher.xml', 'body0', action_dim=2, obs_dim=9) 10 | 11 | def robot_specific_reset(self, bullet_client): 12 | self.jdict["target_x"].reset_current_position( 13 | self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0) 14 | self.jdict["target_y"].reset_current_position( 15 | self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0) 16 | self.fingertip = self.parts["fingertip"] 17 | self.target = self.parts["target"] 18 | self.central_joint = self.jdict["joint0"] 19 | self.elbow_joint = self.jdict["joint1"] 20 | self.central_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 21 | self.elbow_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 22 | 23 | def apply_action(self, a): 24 | assert (np.isfinite(a).all()) 25 | self.central_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1))) 26 | self.elbow_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1))) 27 | 28 | def calc_state(self): 29 | theta, self.theta_dot = self.central_joint.current_relative_position() 30 | self.gamma, self.gamma_dot = self.elbow_joint.current_relative_position() 31 | target_x, _ = self.jdict["target_x"].current_position() 32 | target_y, _ = self.jdict["target_y"].current_position() 33 | self.to_target_vec = np.array(self.fingertip.pose().xyz()) - np.array(self.target.pose().xyz()) 34 | return np.array([ 35 | target_x, 36 | target_y, 37 | self.to_target_vec[0], 38 | self.to_target_vec[1], 39 | np.cos(theta), 40 | np.sin(theta), 41 | self.theta_dot, 42 | self.gamma, 43 | self.gamma_dot, 44 | ]) 45 | 46 | def calc_potential(self): 47 | return -100 * np.linalg.norm(self.to_target_vec) 48 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/manipulators/thrower.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 2 | import numpy as np 3 | 4 | 5 | class Thrower(MJCFBasedRobot): 6 | min_target_placement_radius = 0.1 7 | max_target_placement_radius = 0.8 8 | min_object_placement_radius = 0.1 9 | max_object_placement_radius = 0.8 10 | 11 | def __init__(self): 12 | MJCFBasedRobot.__init__(self, 'thrower.xml', 'body0', action_dim=7, obs_dim=48) 13 | 14 | def robot_specific_reset(self, bullet_client): 15 | # parts 16 | self.fingertip = self.parts["r_wrist_roll_link"] 17 | self.target = self.parts["goal"] 18 | self.object = self.parts["ball"] 19 | 20 | # joints 21 | self.shoulder_pan_joint = self.jdict["r_shoulder_pan_joint"] 22 | self.shoulder_lift_joint = self.jdict["r_shoulder_lift_joint"] 23 | self.upper_arm_roll_joint = self.jdict["r_upper_arm_roll_joint"] 24 | self.elbow_flex_joint = self.jdict["r_elbow_flex_joint"] 25 | self.forearm_roll_joint = self.jdict["r_forearm_roll_joint"] 26 | self.wrist_flex_joint = self.jdict["r_wrist_flex_joint"] 27 | self.wrist_roll_joint = self.jdict["r_wrist_roll_joint"] 28 | 29 | self._object_hit_ground = False 30 | self._object_hit_location = None 31 | 32 | # reset position and speed of manipulator 33 | # TODO: Will this work or do we have to constrain this resetting in some way? 34 | self.shoulder_pan_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 35 | self.shoulder_lift_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 36 | self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 37 | self.elbow_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 38 | self.forearm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 39 | self.wrist_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 40 | self.wrist_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0) 41 | 42 | self.zero_offset = np.array([0.45, 0.55, 0]) 43 | self.object_pos = np.concatenate([ 44 | self.np_random.uniform(low=-1, high=1, size=1), 45 | self.np_random.uniform(low=-1, high=1, size=1), 46 | self.np_random.uniform(low=-1, high=1, size=1) 47 | ]) 48 | 49 | # make length of vector between min and max_object_placement_radius 50 | self.object_pos = self.object_pos \ 51 | / np.linalg.norm(self.object_pos) \ 52 | * self.np_random.uniform(low=self.min_object_placement_radius, 53 | high=self.max_object_placement_radius, size=1) 54 | 55 | # reset object position 56 | self.parts["ball"].reset_pose(self.object_pos - self.zero_offset, np.array([0, 0, 0, 1])) 57 | 58 | self.target_pos = np.concatenate([ 59 | self.np_random.uniform(low=-1, high=1, size=1), 60 | self.np_random.uniform(low=-1, high=1, size=1), 61 | self.np_random.uniform(low=-1, high=1, size=1) 62 | ]) 63 | 64 | # make length of vector between min and max_target_placement_radius 65 | self.target_pos = self.target_pos \ 66 | / np.linalg.norm(self.target_pos) \ 67 | * self.np_random.uniform(low=self.min_target_placement_radius, 68 | high=self.max_target_placement_radius, size=1) 69 | 70 | self.parts["goal"].reset_pose(self.target_pos - self.zero_offset, np.array([0, 0, 0, 1])) 71 | 72 | def apply_action(self, a): 73 | assert (np.isfinite(a).all()) 74 | self.shoulder_pan_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1))) 75 | self.shoulder_lift_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1))) 76 | self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[2], -1, +1))) 77 | self.elbow_flex_joint.set_motor_torque(0.05 * float(np.clip(a[3], -1, +1))) 78 | self.forearm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[4], -1, +1))) 79 | self.wrist_flex_joint.set_motor_torque(0.05 * float(np.clip(a[5], -1, +1))) 80 | self.wrist_roll_joint.set_motor_torque(0.05 * float(np.clip(a[6], -1, +1))) 81 | 82 | def calc_state(self): 83 | self.to_target_vec = self.target_pos - self.object_pos 84 | return np.concatenate([ 85 | np.array([j.current_position() for j in self.ordered_joints]).flatten(), # all positions 86 | np.array([j.current_relative_position() for j in self.ordered_joints]).flatten(), # all speeds 87 | self.to_target_vec, 88 | self.fingertip.pose().xyz(), 89 | self.object.pose().xyz(), 90 | self.target.pose().xyz(), 91 | ]) 92 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/pendula/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/robots/pendula/__init__.py -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/pendula/interted_pendulum.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 2 | import numpy as np 3 | 4 | 5 | class InvertedPendulum(MJCFBasedRobot): 6 | swingup = False 7 | 8 | def __init__(self): 9 | MJCFBasedRobot.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=5) 10 | 11 | def robot_specific_reset(self, bullet_client): 12 | self._p = bullet_client 13 | self.pole = self.parts["pole"] 14 | self.slider = self.jdict["slider"] 15 | self.j1 = self.jdict["hinge"] 16 | u = self.np_random.uniform(low=-.1, high=.1) 17 | self.j1.reset_current_position( u if not self.swingup else 3.1415+u , 0) 18 | self.j1.set_motor_torque(0) 19 | 20 | def apply_action(self, a): 21 | assert( np.isfinite(a).all() ) 22 | if not np.isfinite(a).all(): 23 | print("a is inf") 24 | a[0] = 0 25 | self.slider.set_motor_torque( 100*float(np.clip(a[0], -1, +1)) ) 26 | 27 | def calc_state(self): 28 | self.theta, theta_dot = self.j1.current_position() 29 | x, vx = self.slider.current_position() 30 | assert( np.isfinite(x) ) 31 | 32 | if not np.isfinite(x): 33 | print("x is inf") 34 | x = 0 35 | 36 | if not np.isfinite(vx): 37 | print("vx is inf") 38 | vx = 0 39 | 40 | if not np.isfinite(self.theta): 41 | print("theta is inf") 42 | self.theta = 0 43 | 44 | if not np.isfinite(theta_dot): 45 | print("theta_dot is inf") 46 | theta_dot = 0 47 | 48 | return np.array([ 49 | x, vx, 50 | np.cos(self.theta), np.sin(self.theta), theta_dot 51 | ]) 52 | 53 | 54 | class InvertedPendulumSwingup(InvertedPendulum): 55 | swingup = True 56 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/robots/pendula/inverted_double_pendulum.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot 2 | import numpy as np 3 | 4 | 5 | class InvertedDoublePendulum(MJCFBasedRobot): 6 | def __init__(self): 7 | MJCFBasedRobot.__init__(self, 'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=9) 8 | 9 | def robot_specific_reset(self, bullet_client): 10 | self._p = bullet_client 11 | self.pole2 = self.parts["pole2"] 12 | self.slider = self.jdict["slider"] 13 | self.j1 = self.jdict["hinge"] 14 | self.j2 = self.jdict["hinge2"] 15 | u = self.np_random.uniform(low=-.1, high=.1, size=[2]) 16 | self.j1.reset_current_position(float(u[0]), 0) 17 | self.j2.reset_current_position(float(u[1]), 0) 18 | self.j1.set_motor_torque(0) 19 | self.j2.set_motor_torque(0) 20 | 21 | def apply_action(self, a): 22 | assert(np.isfinite(a).all()) 23 | self.slider.set_motor_torque(200*float(np.clip(a[0], -1, +1))) 24 | 25 | def calc_state(self): 26 | theta, theta_dot = self.j1.current_position() 27 | gamma, gamma_dot = self.j2.current_position() 28 | x, vx = self.slider.current_position() 29 | self.pos_x, _, self.pos_y = self.pole2.pose().xyz() 30 | assert(np.isfinite(x)) 31 | return np.array([ 32 | x, vx, 33 | self.pos_x, 34 | np.cos(theta), np.sin(theta), theta_dot, 35 | np.cos(gamma), np.sin(gamma), gamma_dot, 36 | ]) 37 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/scenes/__init__.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.scenes.stadium import StadiumScene 2 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/scenes/scene_bases.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append(os.path.dirname(__file__)) 3 | import pybullet as p 4 | 5 | import gym 6 | 7 | 8 | class Scene: 9 | "A base class for single- and multiplayer scenes" 10 | 11 | def __init__(self, bullet_client, gravity, timestep, frame_skip): 12 | self._p = bullet_client 13 | self.np_random, seed = gym.utils.seeding.np_random(None) 14 | self.timestep = timestep 15 | self.frame_skip = frame_skip 16 | 17 | self.dt = self.timestep * self.frame_skip 18 | self.cpp_world = World(self._p, gravity, timestep, frame_skip) 19 | 20 | self.test_window_still_open = True # or never opened 21 | self.human_render_detected = False # if user wants render("human"), we open test window 22 | 23 | self.multiplayer_robots = {} 24 | 25 | def test_window(self): 26 | "Call this function every frame, to see what's going on. Not necessary in learning." 27 | self.human_render_detected = True 28 | return self.test_window_still_open 29 | 30 | def actor_introduce(self, robot): 31 | "Usually after scene reset" 32 | if not self.multiplayer: return 33 | self.multiplayer_robots[robot.player_n] = robot 34 | 35 | def actor_is_active(self, robot): 36 | """ 37 | Used by robots to see if they are free to exclusiveley put their HUD on the test window. 38 | Later can be used for click-focus robots. 39 | """ 40 | return not self.multiplayer 41 | 42 | def episode_restart(self, bullet_client): 43 | "This function gets overridden by specific scene, to reset specific objects into their start positions" 44 | self.cpp_world.clean_everything() 45 | #self.cpp_world.test_window_history_reset() 46 | 47 | def global_step(self): 48 | """ 49 | The idea is: apply motor torques for all robots, then call global_step(), then collect 50 | observations from robots using step() with the same action. 51 | """ 52 | self.cpp_world.step(self.frame_skip) 53 | 54 | class SingleRobotEmptyScene(Scene): 55 | multiplayer = False # this class is used "as is" for InvertedPendulum, Reacher 56 | 57 | 58 | class World: 59 | 60 | def __init__(self, bullet_client, gravity, timestep, frame_skip): 61 | self._p = bullet_client 62 | self.gravity = gravity 63 | self.timestep = timestep 64 | self.frame_skip = frame_skip 65 | self.numSolverIterations = 5 66 | self.clean_everything() 67 | 68 | def clean_everything(self): 69 | #p.resetSimulation() 70 | self._p.setGravity(0, 0, -self.gravity) 71 | self._p.setDefaultContactERP(0.9) 72 | #print("self.numSolverIterations=",self.numSolverIterations) 73 | self._p.setPhysicsEngineParameter(fixedTimeStep=self.timestep*self.frame_skip, numSolverIterations=self.numSolverIterations, numSubSteps=self.frame_skip) 74 | 75 | def step(self, frame_skip): 76 | self._p.stepSimulation() 77 | 78 | 79 | -------------------------------------------------------------------------------- /pybulletgym/envs/roboschool/scenes/stadium.py: -------------------------------------------------------------------------------- 1 | import os, inspect 2 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 3 | parentdir = os.path.dirname(currentdir) 4 | os.sys.path.insert(0,parentdir) 5 | 6 | from .scene_bases import Scene 7 | import pybullet 8 | 9 | 10 | class StadiumScene(Scene): 11 | multiplayer = False 12 | zero_at_running_strip_start_line = True # if False, center of coordinates (0,0,0) will be at the middle of the stadium 13 | stadium_halflen = 105*0.25 # FOOBALL_FIELD_HALFLEN 14 | stadium_halfwidth = 50*0.25 # FOOBALL_FIELD_HALFWID 15 | stadiumLoaded = 0 16 | 17 | def episode_restart(self, bullet_client): 18 | self._p = bullet_client 19 | Scene.episode_restart(self, bullet_client) 20 | if self.stadiumLoaded == 0: 21 | self.stadiumLoaded = 1 22 | 23 | # stadium_pose = cpp_household.Pose() 24 | # if self.zero_at_running_strip_start_line: 25 | # stadium_pose.set_xyz(27, 21, 0) # see RUN_STARTLINE, RUN_RAD constants 26 | 27 | filename = os.path.join(os.path.dirname(__file__), "..", "..", "assets", "scenes", "stadium", "plane_stadium.sdf") 28 | self.ground_plane_mjcf=self._p.loadSDF(filename) 29 | #filename = os.path.join(pybullet_data.getDataPath(),"stadium_no_collision.sdf") 30 | #self.ground_plane_mjcf = p.loadSDF(filename) 31 | # 32 | for i in self.ground_plane_mjcf: 33 | self._p.changeDynamics(i,-1,lateralFriction=0.8, restitution=0.5) 34 | self._p.changeVisualShape(i,-1,rgbaColor=[1,1,1,0.8]) 35 | self._p.configureDebugVisualizer(pybullet.COV_ENABLE_PLANAR_REFLECTION,1) 36 | 37 | # for j in range(pybullet.getNumJoints(i)): 38 | # self._p.changeDynamics(i,j,lateralFriction=0) 39 | #despite the name (stadium_no_collision), it DID have collision, so don't add duplicate ground -------------------------------------------------------------------------------- /pybulletgym/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/__init__.py -------------------------------------------------------------------------------- /pybulletgym/examples/roboschool-weights/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/roboschool-weights/__init__.py -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/__init__.py -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "ppo-mujoco-invdpendulumv2-58222240" 2 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58011633" 3 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58059089" 4 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58112398" 5 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58160617" 6 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58222240" 7 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "ppo-mujoco-invpendulumv2-12789838" 2 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12475216" 3 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12555996" 4 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12635251" 5 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12706882" 6 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12789838" 7 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.data-00000-of-00001 -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.index -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.meta -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/cnn_dqn2013_network.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "conv2d", 4 | "size": 16, 5 | "window": 8, 6 | "stride": 4, 7 | "padding": "VALID" 8 | }, 9 | { 10 | "type": "conv2d", 11 | "size": 32, 12 | "window": 4, 13 | "stride": 2, 14 | "padding": "VALID" 15 | }, 16 | { 17 | "type": "flatten" 18 | }, 19 | { 20 | "type": "dense", 21 | "size": 256, 22 | "activation": "relu" 23 | } 24 | ] 25 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/cnn_dqn_network.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "conv2d", 4 | "size": 32, 5 | "window": 8, 6 | "stride": 4 7 | }, 8 | { 9 | "type": "conv2d", 10 | "size": 64, 11 | "window": 4, 12 | "stride": 2 13 | }, 14 | { 15 | "type": "conv2d", 16 | "size": 64, 17 | "window": 3, 18 | "stride": 1 19 | }, 20 | { 21 | "type": "flatten" 22 | }, 23 | { 24 | "type": "dense", 25 | "size": 512 26 | } 27 | ] 28 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/cnn_network_2048.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "conv2d", 4 | "size": 64, 5 | "window": 2, 6 | "stride": 1, 7 | "padding": "VALID" 8 | }, 9 | { 10 | "type": "conv2d", 11 | "size": 64, 12 | "window": 2, 13 | "stride": 1, 14 | "padding": "VALID" 15 | }, 16 | { 17 | "type": "flatten" 18 | }, 19 | { 20 | "type": "dense", 21 | "size": 64, 22 | "activation": "relu" 23 | } 24 | ] 25 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/ddpg.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "ddpg_agent", 3 | 4 | "update_mode": { 5 | "unit": "timesteps", 6 | "batch_size": 64, 7 | "frequency": 64 8 | }, 9 | "memory": { 10 | "type": "replay", 11 | "capacity": 100000, 12 | "include_next_states": true 13 | }, 14 | 15 | "optimizer": { 16 | "type": "adam", 17 | "learning_rate": 1e-4 18 | }, 19 | 20 | "discount": 0.99, 21 | "entropy_regularization": null, 22 | 23 | "critic_network": { 24 | "size_t0": 64, 25 | "size_t1": 64 26 | }, 27 | "critic_optimizer": { 28 | "type": "adam", 29 | "learning_rate": 1e-3 30 | }, 31 | "target_sync_frequency": 1, 32 | "target_update_weight": 0.999, 33 | 34 | "actions_exploration": { 35 | "type": "ornstein_uhlenbeck", 36 | "sigma": 0.3, 37 | "mu": 0.0, 38 | "theta": 0.15 39 | }, 40 | 41 | "saver": { 42 | "directory": null, 43 | "seconds": 600 44 | }, 45 | "summarizer": { 46 | "directory": null, 47 | "labels": [], 48 | "seconds": 120 49 | }, 50 | "execution": { 51 | "type": "single", 52 | "session_config": null, 53 | "distributed_spec": null 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/dqfd.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "dqfd_agent", 3 | 4 | "update_mode": { 5 | "unit": "timesteps", 6 | "batch_size": 64, 7 | "frequency": 4 8 | }, 9 | "memory": { 10 | "type": "replay", 11 | "capacity": 10000, 12 | "include_next_states": true 13 | }, 14 | 15 | "optimizer": { 16 | "type": "adam", 17 | "learning_rate": 1e-3 18 | }, 19 | 20 | "discount": 0.99, 21 | "entropy_regularization": null, 22 | 23 | "target_sync_frequency": 1000, 24 | "target_update_weight": 1.0, 25 | 26 | "actions_exploration": { 27 | "type": "epsilon_decay", 28 | "initial_epsilon": 0.5, 29 | "final_epsilon": 0.0, 30 | "timesteps": 10000 31 | }, 32 | 33 | "saver": { 34 | "directory": null, 35 | "seconds": 600 36 | }, 37 | "summarizer": { 38 | "directory": null, 39 | "labels": [], 40 | "seconds": 120 41 | }, 42 | "execution": { 43 | "type": "single", 44 | "session_config": null, 45 | "distributed_spec": null 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/dqn.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "dqn_agent", 3 | 4 | "update_mode": { 5 | "unit": "timesteps", 6 | "batch_size": 64, 7 | "frequency": 4 8 | }, 9 | "memory": { 10 | "type": "replay", 11 | "capacity": 10000, 12 | "include_next_states": true 13 | }, 14 | 15 | "optimizer": { 16 | "type": "clipped_step", 17 | "clipping_value": 0.1, 18 | "optimizer": { 19 | "type": "adam", 20 | "learning_rate": 1e-3 21 | } 22 | }, 23 | 24 | "discount": 0.99, 25 | "entropy_regularization": null, 26 | "double_q_model": true, 27 | 28 | "target_sync_frequency": 1000, 29 | "target_update_weight": 1.0, 30 | 31 | "actions_exploration": { 32 | "type": "epsilon_anneal", 33 | "initial_epsilon": 0.5, 34 | "final_epsilon": 0.0, 35 | "timesteps": 10000 36 | }, 37 | 38 | "saver": { 39 | "directory": null, 40 | "seconds": 600 41 | }, 42 | "summarizer": { 43 | "directory": null, 44 | "labels": [], 45 | "seconds": 120 46 | }, 47 | "execution": { 48 | "type": "single", 49 | "session_config": null, 50 | "distributed_spec": null 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/dqn_ue4.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "dqn_agent", 3 | 4 | "update_mode": { 5 | "unit": "timesteps", 6 | "batch_size": 64, 7 | "frequency": 4 8 | }, 9 | 10 | "memory": { 11 | "type": "replay", 12 | "capacity": 10000, 13 | "include_next_states": true 14 | }, 15 | 16 | "optimizer": { 17 | "type": "adam", 18 | "learning_rate": 1e-3 19 | }, 20 | 21 | "discount": 0.97, 22 | 23 | "states_preprocessing": [ 24 | { 25 | "type": "divide", 26 | "scale": 255 27 | }, 28 | { 29 | "type": "sequence", 30 | "length": 4, 31 | "add_rank": true 32 | } 33 | ], 34 | 35 | "actions_exploration": { 36 | "type": "epsilon_decay", 37 | "initial_epsilon": 1.0, 38 | "final_epsilon": 0.1, 39 | "timesteps": 100000 40 | }, 41 | 42 | "saver": { 43 | "directory": null, 44 | "seconds": 600 45 | }, 46 | 47 | "summarizer": { 48 | "directory": null, 49 | "labels": [], 50 | "seconds": 120 51 | }, 52 | "execution": { 53 | "type": "single", 54 | "session_config": null, 55 | "distributed_spec": null 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/dqn_visual.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "dqn_agent", 3 | 4 | "update_mode": { 5 | "unit": "timesteps", 6 | "batch_size": 64, 7 | "frequency": 4 8 | }, 9 | 10 | "memory": { 11 | "type": "replay", 12 | "capacity": 10000, 13 | "include_next_states": true 14 | }, 15 | 16 | "optimizer": { 17 | "type": "adam", 18 | "learning_rate": 1e-3 19 | }, 20 | 21 | "discount": 0.97, 22 | 23 | "states_preprocessing": [ 24 | { 25 | "type": "image_resize", 26 | "width": 84, 27 | "height": 84 28 | }, 29 | { 30 | "type": "grayscale" 31 | }, 32 | { 33 | "type": "divide", 34 | "scale": 255 35 | } 36 | ], 37 | 38 | "actions_exploration": { 39 | "type": "epsilon_decay", 40 | "initial_epsilon": 1.0, 41 | "final_epsilon": 0.1, 42 | "timesteps": 100000 43 | }, 44 | 45 | "saver": { 46 | "directory": null, 47 | "seconds": 600 48 | }, 49 | 50 | "summarizer": { 51 | "directory": null, 52 | "labels": [], 53 | "seconds": 120 54 | }, 55 | "execution": { 56 | "type": "single", 57 | "session_config": null, 58 | "distributed_spec": null 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/mlp2_embedding_network.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "embedding", 4 | "indices": 100, 5 | "size": 32 6 | }, 7 | { 8 | "type": "dense", 9 | "size": 32 10 | }, 11 | { 12 | "type": "dense", 13 | "size": 32 14 | } 15 | ] 16 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/mlp2_lstm_network.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "dense", 4 | "size": 32 5 | }, 6 | { 7 | "type": "dense", 8 | "size": 32 9 | }, 10 | { 11 | "type": "internal_lstm", 12 | "size": 32 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/mlp2_network.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "dense", 4 | "size": 32, 5 | "activation": "relu" 6 | }, 7 | { 8 | "type": "dense", 9 | "size": 32, 10 | "activation": "relu" 11 | } 12 | ] 13 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/mlp2_normalized_network.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "linear", 4 | "size": 64 5 | }, 6 | { 7 | "type": "tf_layer", 8 | "layer": "batch_normalization" 9 | }, 10 | { 11 | "type": "nonlinearity", 12 | "name": "relu" 13 | }, 14 | 15 | 16 | { 17 | "type": "linear", 18 | "size": 64 19 | }, 20 | { 21 | "type": "tf_layer", 22 | "layer": "batch_normalization" 23 | }, 24 | { 25 | "type": "nonlinearity", 26 | "name": "relu" 27 | }, 28 | 29 | { 30 | "type": "dense", 31 | "size": 64, 32 | "activation": null 33 | } 34 | ] 35 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/naf.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "naf_agent", 3 | 4 | "update_mode": { 5 | "unit": "timesteps", 6 | "batch_size": 64, 7 | "frequency": 4 8 | }, 9 | "memory": { 10 | "type": "replay", 11 | "capacity": 10000, 12 | "include_next_states": true 13 | }, 14 | 15 | "optimizer": { 16 | "type": "adam", 17 | "learning_rate": 1e-3 18 | }, 19 | 20 | "discount": 0.99, 21 | "entropy_regularization": null, 22 | "double_q_model": true, 23 | 24 | "target_sync_frequency": 1000, 25 | "target_update_weight": 1.0, 26 | 27 | "actions_exploration": { 28 | "type": "ornstein_uhlenbeck", 29 | "sigma": 0.2, 30 | "mu": 0.0, 31 | "theta": 0.15 32 | }, 33 | 34 | "saver": { 35 | "directory": null, 36 | "seconds": 600 37 | }, 38 | "summarizer": { 39 | "directory": null, 40 | "labels": [], 41 | "seconds": 120 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/ppo.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "ppo_agent", 3 | 4 | "update_mode": { 5 | "unit": "episodes", 6 | "batch_size": 10, 7 | "frequency": 10 8 | }, 9 | "memory": { 10 | "type": "latest", 11 | "include_next_states": false, 12 | "capacity": 5000 13 | }, 14 | 15 | "step_optimizer": { 16 | "type": "adam", 17 | "learning_rate": 1e-3 18 | }, 19 | "subsampling_fraction": 0.1, 20 | "optimization_steps": 50, 21 | 22 | "discount": 0.99, 23 | "entropy_regularization": 0.01, 24 | "gae_lambda": null, 25 | "likelihood_ratio_clipping": 0.2, 26 | 27 | "baseline_mode": "states", 28 | "baseline": { 29 | "type": "mlp", 30 | "sizes": [32, 32] 31 | }, 32 | "baseline_optimizer": { 33 | "type": "multi_step", 34 | "optimizer": { 35 | "type": "adam", 36 | "learning_rate": 1e-3 37 | }, 38 | "num_steps": 5 39 | }, 40 | 41 | "saver": { 42 | "directory": null, 43 | "seconds": 600 44 | }, 45 | "summarizer": { 46 | "directory": null, 47 | "labels": [], 48 | "seconds": 120 49 | }, 50 | "execution": { 51 | "type": "single", 52 | "session_config": null, 53 | "distributed_spec": null 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/ppo_cnn.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "ppo_agent", 3 | 4 | "update_mode": { 5 | "unit": "episodes", 6 | "batch_size": 10, 7 | "frequency": 10 8 | }, 9 | "memory": { 10 | "type": "latest", 11 | "include_next_states": false, 12 | "capacity": 5000 13 | }, 14 | 15 | "step_optimizer": { 16 | "type": "adam", 17 | "learning_rate": 1e-3 18 | }, 19 | "subsampling_fraction": 0.1, 20 | "optimization_steps": 50, 21 | 22 | "discount": 0.99, 23 | "entropy_regularization": 0.01, 24 | "gae_lambda": null, 25 | "likelihood_ratio_clipping": 0.2, 26 | 27 | "baseline_mode": "states", 28 | "baseline": { 29 | "type": "cnn", 30 | "conv_sizes": [32, 32], 31 | "dense_sizes": [32] 32 | }, 33 | "baseline_optimizer": { 34 | "type": "multi_step", 35 | "optimizer": { 36 | "type": "adam", 37 | "learning_rate": 1e-3 38 | }, 39 | "num_steps": 5 40 | }, 41 | 42 | "saver": { 43 | "directory": null, 44 | "seconds": 600 45 | }, 46 | "summarizer": { 47 | "directory": null, 48 | "labels": [], 49 | "seconds": 120 50 | }, 51 | "execution": { 52 | "type": "single", 53 | "session_config": null, 54 | "distributed_spec": null 55 | } 56 | } -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/trpo.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "trpo_agent", 3 | 4 | "update_mode": { 5 | "unit": "episodes", 6 | "batch_size": 20, 7 | "frequency": 20 8 | }, 9 | "memory": { 10 | "type": "latest", 11 | "include_next_states": false, 12 | "capacity": 5000 13 | }, 14 | 15 | "learning_rate": 1e-2, 16 | 17 | "discount": 0.99, 18 | "entropy_regularization": null, 19 | "gae_lambda": null, 20 | "likelihood_ratio_clipping": null, 21 | 22 | "baseline_mode": null, 23 | "baseline": null, 24 | "baseline_optimizer": null, 25 | 26 | "saver": { 27 | "directory": null, 28 | "seconds": 600 29 | }, 30 | "summarizer": { 31 | "directory": null, 32 | "labels": [], 33 | "seconds": 120 34 | }, 35 | "execution": { 36 | "type": "single", 37 | "session_config": null, 38 | "distributed_spec": null 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/vpg.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "vpg_agent", 3 | 4 | "update_mode": { 5 | "unit": "episodes", 6 | "batch_size": 20, 7 | "frequency": 20 8 | }, 9 | "memory": { 10 | "type": "latest", 11 | "include_next_states": false, 12 | "capacity": 5000 13 | }, 14 | 15 | "optimizer": { 16 | "type": "adam", 17 | "learning_rate": 2e-2 18 | }, 19 | 20 | "discount": 0.99, 21 | "entropy_regularization": null, 22 | "gae_lambda": null, 23 | 24 | "baseline_mode": "states", 25 | "baseline": { 26 | "type": "mlp", 27 | "sizes": [32, 32] 28 | }, 29 | "baseline_optimizer": { 30 | "type": "multi_step", 31 | "optimizer": { 32 | "type": "adam", 33 | "learning_rate": 1e-3 34 | }, 35 | "num_steps": 5 36 | }, 37 | 38 | "saver": { 39 | "directory": null, 40 | "seconds": 600 41 | }, 42 | "summarizer": { 43 | "directory": null, 44 | "labels": [], 45 | "seconds": 120 46 | }, 47 | "execution": { 48 | "type": "single", 49 | "session_config": null, 50 | "distributed_spec": null 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /pybulletgym/examples/tensorforce/configs/vpg_baseline_visual.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "vpg_agent", 3 | "batch_size": 4000, 4 | "optimizer": { 5 | "type": "adam", 6 | "learning_rate": 1e-2 7 | }, 8 | 9 | "discount": 0.99, 10 | "entropy_regularization": null, 11 | "gae_lambda": 0.97, 12 | 13 | "baseline_mode": "states", 14 | "baseline": { 15 | "type": "mlp", 16 | "sizes": [32, 32] 17 | }, 18 | "baseline_optimizer": { 19 | "type": "multi_step", 20 | "optimizer": { 21 | "type": "adam", 22 | "learning_rate": 1e-3 23 | }, 24 | "num_steps": 5 25 | }, 26 | 27 | "states_preprocessing": [ 28 | { 29 | "type": "image_resize", 30 | "width": 84, 31 | "height": 84 32 | }, 33 | { 34 | "type": "grayscale" 35 | }, 36 | { 37 | "type": "center" 38 | }, 39 | { 40 | "type": "sequence", 41 | "length": 4 42 | } 43 | ], 44 | "execution": { 45 | "type": "single", 46 | "session_config": null, 47 | "distributed_spec": null 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /pybulletgym/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/tests/__init__.py -------------------------------------------------------------------------------- /pybulletgym/tests/roboschool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/tests/roboschool/__init__.py -------------------------------------------------------------------------------- /pybulletgym/tests/roboschool/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/tests/roboschool/agents/__init__.py -------------------------------------------------------------------------------- /pybulletgym/tests/roboschool/agents/policies.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def relu(x): 5 | return np.maximum(x, 0) 6 | 7 | 8 | class SmallReactivePolicy: 9 | """Simple multi-layer perceptron policy, no internal state""" 10 | def __init__(self, observation_space, action_space, weights, biases): 11 | self.weights = weights 12 | self.biases = biases 13 | 14 | def act(self, ob): 15 | x = ob 16 | x = relu(np.dot(x, self.weights[0]) + self.biases[0]) 17 | x = relu(np.dot(x, self.weights[1]) + self.biases[1]) 18 | x = np.dot(x, self.weights[2]) + self.biases[2] 19 | return x 20 | -------------------------------------------------------------------------------- /pybulletgym/tests/test_pybulletgym_mujoco_sanity.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import traceback 4 | import pybulletgym # required for the Bullet envs to be initialized 5 | 6 | envs = [spec.id for spec in gym.envs.registry.all() if spec.id.find('MuJoCo') >= 0] 7 | bugged_envs = [] 8 | for env_name in envs: 9 | try: 10 | print('[TESTING] ENV', env_name, '...') 11 | env = gym.make(env_name) 12 | env.reset() 13 | env.step(np.random.random(env.action_space.shape)) 14 | print('[SUCCESS] ENV', env_name, '\n') 15 | except Exception as e: 16 | print(env_name, ': ', traceback.format_exc()) 17 | bugged_envs.append(env_name) 18 | print('[FAIL] ENV', env_name, '\n') 19 | 20 | print('The following envs have problems:', bugged_envs) 21 | -------------------------------------------------------------------------------- /pybulletgym/tests/test_pybylletgym_roboschool_sanity.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import traceback 4 | import pybulletgym # required for the Bullet envs to be initialized 5 | 6 | envs = [spec.id for spec in gym.envs.registry.all() if spec.id.find('Bullet') >= 0] 7 | bugged_envs = [] 8 | for env_name in envs: 9 | try: 10 | print('[TESTING] ENV', env_name, '...') 11 | env = gym.make(env_name) 12 | env.reset() 13 | env.step(np.random.random(env.action_space.shape)) 14 | print('[SUCCESS] ENV', env_name, '\n') 15 | except Exception as e: 16 | print(env_name, ': ', traceback.format_exc()) 17 | bugged_envs.append(env_name) 18 | print('[FAIL] ENV', env_name, '\n') 19 | 20 | print('The following envs have problems:', bugged_envs) 21 | -------------------------------------------------------------------------------- /pybulletgym/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/utils/__init__.py -------------------------------------------------------------------------------- /pybulletgym/utils/kerasrl_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | from gym import error 4 | import glob 5 | # checkpoints/KerasDDPG-InvertedPendulum-v0-20170701190920_actor.h5 6 | weight_save_re = re.compile(r'^(?:\w+\/)+?(\w+-v\d+)-(\w+-v\d+)-(\d+)(?:_\w+)?\.(\w+)$') 7 | 8 | def get_fields(weight_save_name): 9 | match = weight_save_re.search(weight_save_name) 10 | if not match: 11 | raise error.Error('Attempted to read a malformed weight save: {}. (Currently all weight saves must be of the form {}.)'.format(id,weight_save_re.pattern)) 12 | return match.group(1), match.group(2), int(match.group(3)) 13 | 14 | def get_latest_save(file_folder, agent_name, env_name, version_number): 15 | """ 16 | Returns the properties of the latest weight save. The information can be used to generate the loading path 17 | :return: 18 | """ 19 | path = "%s%s"% (file_folder, "*.h5") 20 | file_list = glob.glob(path) 21 | latest_file_properties = [] 22 | file_properties = [] 23 | for f in file_list: 24 | file_properties = get_fields(f) 25 | if file_properties[0] == agent_name and file_properties[1] == env_name and (latest_file_properties == [] or file_properties[2] > latest_file_properties[2]): 26 | latest_file_properties = file_properties 27 | 28 | return latest_file_properties 29 | -------------------------------------------------------------------------------- /pybulletgym/utils/robot_dev_util.py: -------------------------------------------------------------------------------- 1 | from pybulletgym.envs.roboschool.robots.locomotors import Ant 2 | import gym.utils.seeding 3 | import numpy as np 4 | import pybullet as p 5 | import time 6 | from importlib import import_module 7 | import sys 8 | 9 | 10 | if __name__ == "__main__": 11 | 12 | physicsClient = p.connect(p.GUI) 13 | 14 | path, class_str = sys.argv[1].rsplit('.', 1) 15 | module = import_module(path) 16 | robot_class = getattr(module, class_str) 17 | 18 | robot = robot_class() 19 | np_random, seed = gym.utils.seeding.np_random() 20 | robot.np_random = np_random 21 | robot.reset(p) 22 | 23 | while True: 24 | try: 25 | robot.apply_action(np.random.uniform(robot.action_space.low, robot.action_space.high, robot.action_space.shape)) 26 | p.stepSimulation() 27 | time.sleep(1./240.) 28 | except KeyboardInterrupt: 29 | p.disconnect() 30 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import sys, os.path 3 | 4 | # Don't import gym module here, since deps may not be installed 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'pybulletgym')) 6 | 7 | VERSION = 0.1 8 | 9 | setup_py_dir = os.path.dirname(os.path.realpath(__file__)) 10 | 11 | need_files = [] 12 | datadir = "pybulletgym/envs/assets" 13 | 14 | hh = setup_py_dir + "/" + datadir 15 | 16 | for root, dirs, files in os.walk(hh): 17 | for fn in files: 18 | ext = os.path.splitext(fn)[1][1:] 19 | if ext and ext in 'png gif jpg urdf sdf obj mtl dae off stl STL xml '.split(): 20 | fn = root + "/" + fn 21 | need_files.append(fn[1+len(hh):]) 22 | 23 | setup(name='pybulletgym', 24 | version=VERSION, 25 | description='PyBullet Gym is an open-source implementation of the OpenAI Gym MuJoCo environments for use with the OpenAI Gym Reinforcement Learning Research Platform in support of open research.', 26 | url='https://github.com/benelot/pybullet-gym', 27 | author='Benjamin Ellenberger', 28 | author_email='be.ellenberger@gmail.com', 29 | license='', 30 | packages=[package for package in find_packages() 31 | if package.startswith('pybulletgym')], 32 | zip_safe=False, 33 | install_requires=[ 34 | 'pybullet>=1.7.8', 35 | ], 36 | package_data={'pybulletgym': need_files}, 37 | ) 38 | --------------------------------------------------------------------------------