├── .gitignore
├── LICENSE.md
├── README.md
├── __init__.py
├── pybulletgym
├── __init__.py
├── agents
│ ├── __init__.py
│ ├── agent_register.py
│ ├── agents_baselines.py
│ └── agents_kerasrl.py
├── envs
│ ├── __init__.py
│ ├── assets
│ │ ├── __init__.py
│ │ ├── mjcf
│ │ │ ├── ant.xml
│ │ │ ├── capsule.xml
│ │ │ ├── capsule_fromtoX.xml
│ │ │ ├── capsule_fromtoY.xml
│ │ │ ├── capsule_fromtoZ.xml
│ │ │ ├── cylinder.xml
│ │ │ ├── cylinder_fromtoX.xml
│ │ │ ├── cylinder_fromtoY.xml
│ │ │ ├── cylinder_fromtoZ.xml
│ │ │ ├── ground.xml
│ │ │ ├── ground_plane.xml
│ │ │ ├── half_cheetah.xml
│ │ │ ├── hello_mjcf.xml
│ │ │ ├── hopper.xml
│ │ │ ├── humanoid.xml
│ │ │ ├── humanoid_fixed.xml
│ │ │ ├── humanoid_symmetric.xml
│ │ │ ├── humanoid_symmetric_no_ground.xml
│ │ │ ├── inverted_double_pendulum.xml
│ │ │ ├── inverted_pendulum.xml
│ │ │ ├── pusher.xml
│ │ │ ├── reacher.xml
│ │ │ ├── striker.xml
│ │ │ ├── swimmer.xml
│ │ │ ├── thrower.xml
│ │ │ └── walker2d.xml
│ │ ├── robots
│ │ │ └── atlas
│ │ │ │ └── atlas_description
│ │ │ │ ├── atlas_v4_with_multisense.urdf
│ │ │ │ ├── materials
│ │ │ │ └── textures
│ │ │ │ │ ├── drc_extremities_diffuse.jpg
│ │ │ │ │ ├── extremities_diffuse_unplugged.jpg
│ │ │ │ │ ├── right_leg_diffuse_unplugged.jpg
│ │ │ │ │ └── torso_diffuse_unplugged.jpg
│ │ │ │ ├── meshes_unplugged
│ │ │ │ ├── l_foot.dae
│ │ │ │ ├── l_lglut.dae
│ │ │ │ ├── l_lleg.dae
│ │ │ │ ├── l_talus.dae
│ │ │ │ ├── l_uglut.dae
│ │ │ │ ├── l_uleg.dae
│ │ │ │ ├── ltorso.dae
│ │ │ │ ├── mtorso.dae
│ │ │ │ ├── pelvis.dae
│ │ │ │ ├── r_clav.dae
│ │ │ │ ├── r_foot.dae
│ │ │ │ ├── r_lglut.dae
│ │ │ │ ├── r_lleg.dae
│ │ │ │ ├── r_scap.dae
│ │ │ │ ├── r_talus.dae
│ │ │ │ ├── r_uarm.dae
│ │ │ │ ├── r_uglut.dae
│ │ │ │ ├── r_uleg.dae
│ │ │ │ └── utorso.dae
│ │ │ │ ├── meshes_v3
│ │ │ │ ├── r_farm.dae
│ │ │ │ ├── r_hand.dae
│ │ │ │ └── r_larm.dae
│ │ │ │ └── multisense_sl_description
│ │ │ │ ├── materials
│ │ │ │ └── textures
│ │ │ │ │ └── drc_torso_head_diffuse.jpg
│ │ │ │ └── meshes
│ │ │ │ ├── head.dae
│ │ │ │ └── head_camera.dae
│ │ ├── scenes
│ │ │ └── stadium
│ │ │ │ ├── part0.obj
│ │ │ │ ├── part1.obj
│ │ │ │ ├── part2.obj
│ │ │ │ ├── plane100.obj
│ │ │ │ ├── plane100.urdf
│ │ │ │ ├── plane_stadium.sdf
│ │ │ │ ├── stadium.mtl
│ │ │ │ ├── stadium.obj
│ │ │ │ ├── stadium.sdf
│ │ │ │ └── stadium_grass.jpg
│ │ └── things
│ │ │ ├── cube_small.urdf
│ │ │ └── sphere_small.urdf
│ ├── gym_utils.py
│ ├── mujoco
│ │ ├── __init__.py
│ │ ├── envs
│ │ │ ├── __init__.py
│ │ │ ├── env_bases.py
│ │ │ ├── locomotion
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ant_env.py
│ │ │ │ ├── half_cheetah_env.py
│ │ │ │ ├── hopper_env.py
│ │ │ │ ├── humanoid_env.py
│ │ │ │ ├── walker2d_env.py
│ │ │ │ └── walker_base_env.py
│ │ │ ├── manipulation
│ │ │ │ ├── __init__.py
│ │ │ │ ├── pusher_env.py
│ │ │ │ ├── reacher_env.py
│ │ │ │ ├── striker_env.py
│ │ │ │ └── thrower_env.py
│ │ │ └── pendulum
│ │ │ │ ├── __init__.py
│ │ │ │ ├── inverted_double_pendulum_env.py
│ │ │ │ └── inverted_pendulum_env.py
│ │ ├── robots
│ │ │ ├── __init__.py
│ │ │ ├── locomotors
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ant.py
│ │ │ │ ├── half_cheetah.py
│ │ │ │ ├── hopper.py
│ │ │ │ ├── humanoid.py
│ │ │ │ ├── walker2d.py
│ │ │ │ └── walker_base.py
│ │ │ ├── manipulators
│ │ │ │ ├── __init__.py
│ │ │ │ ├── pusher.py
│ │ │ │ ├── reacher.py
│ │ │ │ ├── striker.py
│ │ │ │ └── thrower.py
│ │ │ ├── pendula
│ │ │ │ ├── __init__.py
│ │ │ │ ├── inverted_double_pendulum.py
│ │ │ │ └── inverted_pendulum.py
│ │ │ └── robot_bases.py
│ │ └── scenes
│ │ │ ├── __init__.py
│ │ │ ├── scene_bases.py
│ │ │ └── stadium.py
│ └── roboschool
│ │ ├── __init__.py
│ │ ├── envs
│ │ ├── __init__.py
│ │ ├── env_bases.py
│ │ ├── locomotion
│ │ │ ├── __init__.py
│ │ │ ├── ant_env.py
│ │ │ ├── atlas_env.py
│ │ │ ├── half_cheetah_env.py
│ │ │ ├── hopper_env.py
│ │ │ ├── humanoid_env.py
│ │ │ ├── humanoid_flagrun_env.py
│ │ │ ├── walker2d_env.py
│ │ │ └── walker_base_env.py
│ │ ├── manipulation
│ │ │ ├── __init__.py
│ │ │ ├── pusher_env.py
│ │ │ ├── reacher_env.py
│ │ │ ├── striker_env.py
│ │ │ └── thrower_env.py
│ │ └── pendulum
│ │ │ ├── __init__.py
│ │ │ ├── inverted_double_pendulum_env.py
│ │ │ └── inverted_pendulum_env.py
│ │ ├── robots
│ │ ├── __init__.py
│ │ ├── locomotors
│ │ │ ├── __init__.py
│ │ │ ├── ant.py
│ │ │ ├── atlas.py
│ │ │ ├── half_cheetah.py
│ │ │ ├── hopper.py
│ │ │ ├── humanoid.py
│ │ │ ├── humanoid_flagrun.py
│ │ │ ├── walker2d.py
│ │ │ └── walker_base.py
│ │ ├── manipulators
│ │ │ ├── __init__.py
│ │ │ ├── pusher.py
│ │ │ ├── reacher.py
│ │ │ ├── striker.py
│ │ │ └── thrower.py
│ │ ├── pendula
│ │ │ ├── __init__.py
│ │ │ ├── interted_pendulum.py
│ │ │ └── inverted_double_pendulum.py
│ │ └── robot_bases.py
│ │ └── scenes
│ │ ├── __init__.py
│ │ ├── scene_bases.py
│ │ └── stadium.py
├── examples
│ ├── __init__.py
│ ├── roboschool-weights
│ │ ├── __init__.py
│ │ ├── enjoy_TF_AntPyBulletEnv_v0_2017may.py
│ │ ├── enjoy_TF_AtlasPyBulletEnv_v0_2017jul.py
│ │ ├── enjoy_TF_HalfCheetahPyBulletEnv_v0_2017may.py
│ │ ├── enjoy_TF_HopperPyBulletEnv_v0_2017may.py
│ │ ├── enjoy_TF_HumanoidFlagrunHarderPyBulletEnv_v0_2017may.py
│ │ ├── enjoy_TF_HumanoidFlagrunHarderPyBulletEnv_v1_2017jul.py
│ │ ├── enjoy_TF_HumanoidFlagrunPyBulletEnv_v0_2017may.py
│ │ ├── enjoy_TF_HumanoidPyBulletEnv_v0_2017may.py
│ │ ├── enjoy_TF_InvertedDoublePendulumPyBulletEnv_v0_2017may.py
│ │ ├── enjoy_TF_InvertedPendulumPyBulletEnv_v0_2017may.py
│ │ ├── enjoy_TF_InvertedPendulumSwingupPyBulletEnv_v0_2017may.py
│ │ ├── enjoy_TF_ReacherPyBulletEnv_v0_2017may.py
│ │ └── enjoy_TF_Walker2DPyBulletEnv_v0_2017may.py
│ └── tensorforce
│ │ ├── __init__.py
│ │ ├── checkpoints
│ │ ├── mujoco-invdpendulumv2
│ │ │ ├── checkpoint
│ │ │ ├── ppo-mujoco-invdpendulumv2-58011633.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invdpendulumv2-58011633.index
│ │ │ ├── ppo-mujoco-invdpendulumv2-58011633.meta
│ │ │ ├── ppo-mujoco-invdpendulumv2-58059089.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invdpendulumv2-58059089.index
│ │ │ ├── ppo-mujoco-invdpendulumv2-58059089.meta
│ │ │ ├── ppo-mujoco-invdpendulumv2-58112398.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invdpendulumv2-58112398.index
│ │ │ ├── ppo-mujoco-invdpendulumv2-58112398.meta
│ │ │ ├── ppo-mujoco-invdpendulumv2-58160617.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invdpendulumv2-58160617.index
│ │ │ ├── ppo-mujoco-invdpendulumv2-58160617.meta
│ │ │ ├── ppo-mujoco-invdpendulumv2-58222240.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invdpendulumv2-58222240.index
│ │ │ └── ppo-mujoco-invdpendulumv2-58222240.meta
│ │ └── mujoco-invpendulumv2
│ │ │ ├── checkpoint
│ │ │ ├── ppo-mujoco-invpendulumv2-12475216.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invpendulumv2-12475216.index
│ │ │ ├── ppo-mujoco-invpendulumv2-12475216.meta
│ │ │ ├── ppo-mujoco-invpendulumv2-12555996.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invpendulumv2-12555996.index
│ │ │ ├── ppo-mujoco-invpendulumv2-12555996.meta
│ │ │ ├── ppo-mujoco-invpendulumv2-12635251.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invpendulumv2-12635251.index
│ │ │ ├── ppo-mujoco-invpendulumv2-12635251.meta
│ │ │ ├── ppo-mujoco-invpendulumv2-12706882.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invpendulumv2-12706882.index
│ │ │ ├── ppo-mujoco-invpendulumv2-12706882.meta
│ │ │ ├── ppo-mujoco-invpendulumv2-12789838.data-00000-of-00001
│ │ │ ├── ppo-mujoco-invpendulumv2-12789838.index
│ │ │ └── ppo-mujoco-invpendulumv2-12789838.meta
│ │ ├── configs
│ │ ├── cnn_dqn2013_network.json
│ │ ├── cnn_dqn_network.json
│ │ ├── cnn_network_2048.json
│ │ ├── ddpg.json
│ │ ├── dqfd.json
│ │ ├── dqn.json
│ │ ├── dqn_ue4.json
│ │ ├── dqn_visual.json
│ │ ├── mlp2_embedding_network.json
│ │ ├── mlp2_lstm_network.json
│ │ ├── mlp2_network.json
│ │ ├── mlp2_normalized_network.json
│ │ ├── naf.json
│ │ ├── ppo.json
│ │ ├── ppo_cnn.json
│ │ ├── trpo.json
│ │ ├── vpg.json
│ │ └── vpg_baseline_visual.json
│ │ └── openai_gym.py
├── tests
│ ├── __init__.py
│ ├── roboschool
│ │ ├── __init__.py
│ │ └── agents
│ │ │ ├── AntPyBulletEnv_v0_2017may.py
│ │ │ ├── AtlasPyBulletEnv_v0_2017jul.py
│ │ │ ├── HalfCheetahPyBulletEnv_v0_2017may.py
│ │ │ ├── HopperPyBulletEnv_v0_2017may.py
│ │ │ ├── HumanoidFlagrunHarderPyBulletEnv_v0_2017may.py
│ │ │ ├── HumanoidFlagrunHarderPyBulletEnv_v1_2017jul.py
│ │ │ ├── HumanoidFlagrunPyBulletEnv_v0_2017may.py
│ │ │ ├── HumanoidPyBulletEnv_v0_2017may.py
│ │ │ ├── InvertedDoublePendulumPyBulletEnv_v0_2017may.py
│ │ │ ├── InvertedPendulumPyBulletEnv_v0_2017may.py
│ │ │ ├── InvertedPendulumSwingupPyBulletEnv_v0_2017may.py
│ │ │ ├── ReacherPyBulletEnv_v0_017may.py
│ │ │ ├── Walker2DPyBulletEnv_v0_2017may.py
│ │ │ ├── __init__.py
│ │ │ └── policies.py
│ ├── test_pybulletgym_mujoco_sanity.py
│ ├── test_pybulletgym_roboschool_performance.py
│ └── test_pybylletgym_roboschool_sanity.py
└── utils
│ ├── __init__.py
│ ├── kerasrl_utils.py
│ └── robot_dev_util.py
├── requirements.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # intellij idea
104 | .idea/
105 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # pybullet-gym
2 |
3 | MIT License
4 |
5 | Copyright (c) 2018 Benjamin Ellenberger
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 |
25 | # Mujoco models
26 |
27 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license:
28 |
29 | ```
30 | This file is part of MuJoCo.
31 | Copyright 2009-2015 Roboti LLC.
32 | Mujoco :: Advanced physics simulation engine
33 | Source : www.roboti.us
34 | Version : 1.31
35 | Released : 23Apr16
36 | Author :: Vikash Kumar
37 | Contacts : kumar@roboti.us
38 | ```
39 |
40 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/__init__.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs import register # this is included to trigger env loading
--------------------------------------------------------------------------------
/pybulletgym/agents/__init__.py:
--------------------------------------------------------------------------------
1 | # ---- register agents ----------
2 | import agent_register
3 |
4 | # agent_register.register(
5 | # id='BaselinesDQNAgent-v0',
6 | # entry_point='agents_baselines:BaselinesDQNAgent'
7 | # )
8 |
9 | agent_register.register(
10 | id='KerasCEMAgent-v0',
11 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasCEMAgent'
12 | )
13 |
14 | agent_register.register(
15 | id='KerasDDPGAgent-v0',
16 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasDDPGAgent'
17 | )
18 |
19 | agent_register.register(
20 | id='KerasDDQNAgent-v0',
21 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasDDQNAgent'
22 | )
23 |
24 | agent_register.register(
25 | id='KerasDQNAgent-v0',
26 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasDQNAgent'
27 | )
28 |
29 | agent_register.register(
30 | id='KerasNAFAgent-v0',
31 | entry_point='pybullet_envs.agents.agents_kerasrl:KerasNAFAgent'
32 | )
33 |
34 | # from agents_baselines import BaselinesDQNAgent
35 | from agents_kerasrl import KerasCEMAgent, KerasDDPGAgent, KerasDDQNAgent, KerasDQNAgent, KerasNAFAgent
36 |
--------------------------------------------------------------------------------
/pybulletgym/agents/agent_register.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import pkg_resources
3 | import re
4 | from gym import error
5 | import warnings
6 |
7 | logger = logging.getLogger(__name__)
8 | # This format is true today, but it's *not* an official spec.
9 | # [username/](agent-name)-v(version) env-name is group 1, version is group 2
10 | #
11 | # 2017-08-26: We're experimentally expanding the agent ID format
12 | # to include an optional username.
13 | agent_id_re = re.compile(r'^(?:[\w:-]+\/)?([\w:.-]+)-v(\d+)$')
14 |
15 | def load(name):
16 | entry_point = pkg_resources.EntryPoint.parse('x={}'.format(name))
17 | result = entry_point.load(False)
18 | return result
19 |
20 | class AgentSpec(object):
21 | """A specification for a particular instance of the agent. Used
22 | to register the parameters for official evaluations.
23 | Args:
24 | id (str): The official agent ID
25 | entry_point (Optional[str]): The Python entrypoint of the agent class (e.g. module.name:Class)
26 | kwargs (dict): The kwargs to pass to the agent class
27 | Attributes:
28 | id (str): The official agent ID
29 | """
30 |
31 | def __init__(self, id, entry_point=None, kwargs=None):
32 | self.id = id
33 |
34 | # We may make some of these other parameters public if they're
35 | # useful.
36 | match = agent_id_re.search(id)
37 | if not match:
38 | raise error.Error('Attempted to register malformed agent ID: {}. (Currently all IDs must be of the form {}.)'.format(id, agent_id_re.pattern))
39 | self._agent_name = match.group(1)
40 | self._entry_point = entry_point
41 | self._kwargs = {} if kwargs is None else kwargs
42 |
43 | def make(self, **kwargs):
44 | """Instantiates an instance of the agent with appropriate kwargs"""
45 | if self._entry_point is None:
46 | raise error.Error('Attempting to make deprecated agent {}. (HINT: is there a newer registered version of this agent?)'.format(self.id))
47 |
48 | cls = load(self._entry_point)
49 | agent = cls(**kwargs)
50 |
51 | # Make the agent aware of which spec it came from.
52 | #agent.unwrapped._spec = self
53 |
54 | return agent
55 |
56 | def __repr__(self):
57 | return "AgentSpec({})".format(self.id)
58 |
59 | class AgentRegistry(object):
60 | """Register an agent by ID.
61 | """
62 |
63 | def __init__(self):
64 | self.agent_specs = {}
65 |
66 | def make(self, id, **kwargs):
67 | logger.info('Making new agent: %s', id)
68 | spec = self.spec(id)
69 | agent = spec.make(**kwargs)
70 |
71 | return agent
72 |
73 |
74 | def all(self):
75 | return self.agent_specs.values()
76 |
77 | def spec(self, id):
78 | match = agent_id_re.search(id)
79 | if not match:
80 | raise error.Error('Attempted to look up malformed agent ID: {}. (Currently all IDs must be of the form {}.)'.format(id.encode('utf-8'), agent_id_re.pattern))
81 |
82 | try:
83 | return self.agent_specs[id]
84 | except KeyError:
85 | # Parse the agent name and check to see if it matches the non-version
86 | # part of a valid agent (could also check the exact number here)
87 | agent_name = match.group(1)
88 | matching_agents = [valid_agent_name for valid_agent_name, valid_agent_spec in self.agent_specs.items()
89 | if agent_name == valid_agent_spec._agent_name]
90 | if matching_agents:
91 | raise error.DeprecatedEnv('Agent {} not found (valid versions include {})'.format(id, matching_agents))
92 | else:
93 | raise error.UnregisteredEnv('No registered agent with id: {}'.format(id))
94 |
95 | def register(self, id, **kwargs):
96 | if id in self.agent_specs:
97 | raise error.Error('Cannot re-register id: {}'.format(id))
98 | self.agent_specs[id] = AgentSpec(id, **kwargs)
99 |
100 | # Have a global registry
101 | agent_registry = AgentRegistry()
102 |
103 | def register(id, **kwargs):
104 | return agent_registry.register(id, **kwargs)
105 |
106 | def make(id, **kwargs):
107 | return agent_registry.make(id, **kwargs)
108 |
109 | def spec(id):
110 | return agent_registry.spec(id)
111 |
112 |
--------------------------------------------------------------------------------
/pybulletgym/agents/agents_baselines.py:
--------------------------------------------------------------------------------
1 | from baselines import deepq
2 |
3 |
4 | def add_opts(parser):
5 | pass
6 |
7 |
8 | class BaselinesDQNAgent(object):
9 | '''
10 | classdocs
11 | '''
12 |
13 | def __init__(self, opts):
14 | self.metadata = {
15 | 'discrete_actions': True,
16 | }
17 |
18 | self.opts = opts
19 | self.agent = None
20 |
21 | def configure(self, observation_space_shape, nb_actions):
22 | pass
23 |
24 | def train(self, env, nb_steps, visualize, verbosity):
25 | model = deepq.models.mlp([64])
26 | self.agent = deepq.learn(
27 | env,
28 | q_func=model,
29 | lr=1e-3,
30 | max_timesteps=nb_steps,
31 | buffer_size=50000,
32 | exploration_fraction=0.1,
33 | exploration_final_eps=0.02,
34 | print_freq=10 if verbosity else None,
35 | callback=env.render if visualize else None
36 | )
37 |
38 | def test(self, env, nb_episodes, visualize):
39 | episodes = 0
40 | while episodes < nb_episodes:
41 | obs, done = env.reset(), False
42 | episode_rew = 0
43 | while not done:
44 | if visualize:
45 | env.render()
46 | obs, rew, done, _ = env.step(self.agent(obs[None])[0])
47 | episode_rew += rew
48 | print("Episode reward", episode_rew)
49 | episodes += 1
50 |
51 | def load_weights(self, load_file):
52 | self.agent = deepq.load(load_file)
53 |
54 | def save_weights(self, save_file, overwrite):
55 | self.agent.save(save_file)
--------------------------------------------------------------------------------
/pybulletgym/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 |
3 | # roboschool envs
4 | ## pendula
5 | register(
6 | id='InvertedPendulumPyBulletEnv-v0',
7 | entry_point='pybulletgym.envs.roboschool.envs.pendulum.inverted_pendulum_env:InvertedPendulumBulletEnv',
8 | max_episode_steps=1000,
9 | reward_threshold=950.0,
10 | )
11 |
12 | register(
13 | id='InvertedDoublePendulumPyBulletEnv-v0',
14 | entry_point='pybulletgym.envs.roboschool.envs.pendulum.inverted_double_pendulum_env:InvertedDoublePendulumBulletEnv',
15 | max_episode_steps=1000,
16 | reward_threshold=9100.0,
17 | )
18 |
19 | register(
20 | id='InvertedPendulumSwingupPyBulletEnv-v0',
21 | entry_point='pybulletgym.envs.roboschool.envs.pendulum.inverted_pendulum_env:InvertedPendulumSwingupBulletEnv',
22 | max_episode_steps=1000,
23 | reward_threshold=800.0,
24 | )
25 |
26 |
27 | ## manipulators
28 | register(
29 | id='ReacherPyBulletEnv-v0',
30 | entry_point='pybulletgym.envs.roboschool.envs.manipulation.reacher_env:ReacherBulletEnv',
31 | max_episode_steps=150,
32 | reward_threshold=18.0,
33 | )
34 |
35 | register(
36 | id='PusherPyBulletEnv-v0',
37 | entry_point='pybulletgym.envs.roboschool.envs.manipulation.pusher_env:PusherBulletEnv',
38 | max_episode_steps=150,
39 | reward_threshold=18.0,
40 | )
41 |
42 | register(
43 | id='ThrowerPyBulletEnv-v0',
44 | entry_point='pybulletgym.envs.roboschool.envs.manipulation.thrower_env:ThrowerBulletEnv',
45 | max_episode_steps=100,
46 | reward_threshold=18.0,
47 | )
48 |
49 | register(
50 | id='StrikerPyBulletEnv-v0',
51 | entry_point='pybulletgym.envs.roboschool.envs.manipulation.striker_env:StrikerBulletEnv',
52 | max_episode_steps=100,
53 | reward_threshold=18.0,
54 | )
55 |
56 | ## locomotors
57 | register(
58 | id='Walker2DPyBulletEnv-v0',
59 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.walker2d_env:Walker2DBulletEnv',
60 | max_episode_steps=1000,
61 | reward_threshold=2500.0
62 | )
63 | register(
64 | id='HalfCheetahPyBulletEnv-v0',
65 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.half_cheetah_env:HalfCheetahBulletEnv',
66 | max_episode_steps=1000,
67 | reward_threshold=3000.0
68 | )
69 |
70 | register(
71 | id='AntPyBulletEnv-v0',
72 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.ant_env:AntBulletEnv',
73 | max_episode_steps=1000,
74 | reward_threshold=2500.0
75 | )
76 |
77 | register(
78 | id='HopperPyBulletEnv-v0',
79 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.hopper_env:HopperBulletEnv',
80 | max_episode_steps=1000,
81 | reward_threshold=2500.0
82 | )
83 |
84 | register(
85 | id='HumanoidPyBulletEnv-v0',
86 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.humanoid_env:HumanoidBulletEnv',
87 | max_episode_steps=1000
88 | )
89 |
90 | register(
91 | id='HumanoidFlagrunPyBulletEnv-v0',
92 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.humanoid_flagrun_env:HumanoidFlagrunBulletEnv',
93 | max_episode_steps=1000,
94 | reward_threshold=2000.0
95 | )
96 |
97 | register(
98 | id='HumanoidFlagrunHarderPyBulletEnv-v0',
99 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.humanoid_flagrun_env:HumanoidFlagrunHarderBulletEnv',
100 | max_episode_steps=1000
101 | )
102 |
103 | register(
104 | id='AtlasPyBulletEnv-v0',
105 | entry_point='pybulletgym.envs.roboschool.envs.locomotion.atlas_env:AtlasBulletEnv',
106 | max_episode_steps=1000
107 | )
108 |
109 | # mujoco envs
110 | register(
111 | id='InvertedPendulumMuJoCoEnv-v0',
112 | entry_point='pybulletgym.envs.mujoco.envs.pendulum.inverted_pendulum_env:InvertedPendulumMuJoCoEnv',
113 | max_episode_steps=1000,
114 | reward_threshold=950.0,
115 | )
116 |
117 | register(
118 | id='InvertedDoublePendulumMuJoCoEnv-v0',
119 | entry_point='pybulletgym.envs.mujoco.envs.pendulum.inverted_double_pendulum_env:InvertedDoublePendulumMuJoCoEnv',
120 | max_episode_steps=1000,
121 | reward_threshold=9100.0,
122 | )
123 |
124 | register(
125 | id='Walker2DMuJoCoEnv-v0',
126 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.walker2d_env:Walker2DMuJoCoEnv',
127 | max_episode_steps=1000,
128 | reward_threshold=2500.0
129 | )
130 | register(
131 | id='HalfCheetahMuJoCoEnv-v0',
132 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.half_cheetah_env:HalfCheetahMuJoCoEnv',
133 | max_episode_steps=1000,
134 | reward_threshold=3000.0
135 | )
136 |
137 | register(
138 | id='AntMuJoCoEnv-v0',
139 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.ant_env:AntMuJoCoEnv',
140 | max_episode_steps=1000,
141 | reward_threshold=2500.0
142 | )
143 |
144 | register(
145 | id='HopperMuJoCoEnv-v0',
146 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.hopper_env:HopperMuJoCoEnv',
147 | max_episode_steps=1000,
148 | reward_threshold=2500.0
149 | )
150 |
151 | register(
152 | id='HumanoidMuJoCoEnv-v0',
153 | entry_point='pybulletgym.envs.mujoco.envs.locomotion.humanoid_env:HumanoidMuJoCoEnv',
154 | max_episode_steps=1000
155 | )
156 |
157 |
158 | def get_list():
159 | envs = ['- ' + spec.id for spec in gym.pgym.envs.registry.all() if spec.id.find('Bullet') >= 0 or spec.id.find('MuJoCo') >= 0]
160 | return envs
161 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/ant.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/capsule.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/capsule_fromtoX.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/capsule_fromtoY.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/capsule_fromtoZ.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/cylinder.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/cylinder_fromtoX.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/cylinder_fromtoY.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/cylinder_fromtoZ.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/ground.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/ground_plane.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/half_cheetah.xml:
--------------------------------------------------------------------------------
1 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/hello_mjcf.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/hopper.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/inverted_double_pendulum.xml:
--------------------------------------------------------------------------------
1 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/inverted_pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/reacher.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/swimmer.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/mjcf/walker2d.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/drc_extremities_diffuse.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/drc_extremities_diffuse.jpg
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/extremities_diffuse_unplugged.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/extremities_diffuse_unplugged.jpg
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/right_leg_diffuse_unplugged.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/right_leg_diffuse_unplugged.jpg
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/torso_diffuse_unplugged.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/materials/textures/torso_diffuse_unplugged.jpg
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/robots/atlas/atlas_description/multisense_sl_description/materials/textures/drc_torso_head_diffuse.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/robots/atlas/atlas_description/multisense_sl_description/materials/textures/drc_torso_head_diffuse.jpg
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/scenes/stadium/plane100.obj:
--------------------------------------------------------------------------------
1 | # Blender v2.66 (sub 1) OBJ File: ''
2 | # www.blender.org
3 | mtllib plane.mtl
4 | o Plane
5 | v 100.000000 -100.000000 0.000000
6 | v 100.000000 100.000000 0.000000
7 | v -100.000000 100.000000 0.000000
8 | v -100.000000 -100.000000 0.000000
9 |
10 | vt 100.000000 0.000000
11 | vt 100.000000 100.000000
12 | vt 0.000000 100.000000
13 | vt 0.000000 0.000000
14 |
15 |
16 |
17 | usemtl Material
18 | s off
19 | f 1/1 2/2 3/3
20 | f 1/1 3/3 4/4
21 |
22 |
23 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/scenes/stadium/plane100.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/scenes/stadium/plane_stadium.sdf:
--------------------------------------------------------------------------------
1 |
2 |
3 | 0 0 -9.8
4 |
5 | 1
6 | 0 0 0 0 0 0
7 |
8 |
9 | 0
10 |
11 | 0.166667
12 | 0
13 | 0
14 | 0.166667
15 | 0
16 | 0.166667
17 |
18 |
19 |
20 |
21 |
22 | 0 0 1
23 | 100 100
24 |
25 |
26 |
27 |
28 |
29 |
30 | 1 1 1
31 | plane100.obj
32 |
33 |
34 |
35 | 1 1 1 1
36 | 1 1 1 1
37 | .5 .5 .5 1
38 | 0 0 0 0
39 |
40 |
41 |
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/scenes/stadium/stadium.mtl:
--------------------------------------------------------------------------------
1 | newmtl stadium_white
2 | Ns 96.078431
3 | Ka 0.000000 0.000000 0.000000
4 | Kd 1.000000 1.000000 1.000000
5 | Ks 0.500000 0.500000 0.500000
6 |
7 | newmtl stadium_grass
8 | Ka 0.000000 0.000000 0.000000
9 | Kd 0.000000 0.500000 0.000000
10 | Ks 0.000000 0.000000 0.000000
11 | map_Kd stadium_grass.jpg
12 |
13 | newmtl stadium_dirt
14 | Ka 0.000000 0.000000 0.000000
15 | Kd 0.600000 0.400000 0.000000
16 | Ks 0.000000 0.000000 0.000000
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/scenes/stadium/stadium.sdf:
--------------------------------------------------------------------------------
1 |
2 |
3 | 0 0 -9.8
4 |
5 | 1
6 | 0 0 0 0 0 0
7 |
8 |
9 | 0
10 |
11 | 0.166667
12 | 0
13 | 0
14 | 0.166667
15 | 0
16 | 0.166667
17 |
18 |
19 |
20 |
21 |
22 | 1 1 1
23 | part0.obj
24 |
25 |
26 |
27 | 1 1 1 1
28 | 1.00000 1.00000 1.000000 1
29 | 0.1 .1 .1 1
30 | 0 0 0 0
31 |
32 |
33 |
34 |
35 |
36 | 1
37 | 0 0 0 0 0 0
38 |
39 |
40 | 0
41 |
42 | 0.166667
43 | 0
44 | 0
45 | 0.166667
46 | 0
47 | 0.166667
48 |
49 |
50 |
51 |
52 |
53 | 0 0 1
54 | 100 100
55 |
56 |
57 |
58 |
59 |
60 |
61 | 1 1 1
62 | part1.obj
63 |
64 |
65 |
66 | 1 1 1 1
67 | 0.600000 0.400000 0.000000 1
68 | .5 .5 .5 1
69 | 0 0 0 0
70 |
71 |
72 |
73 |
74 |
75 | 1
76 | 0 0 0 0 0 0
77 |
78 |
79 | 0
80 |
81 | 0.166667
82 | 0
83 | 0
84 | 0.166667
85 | 0
86 | 0.166667
87 |
88 |
89 |
90 |
91 |
92 |
93 | 1 1 1
94 | part2.obj
95 |
96 |
97 |
98 | 1 0 0 1
99 | 0.000000 0.500000 0.000000 1
100 | 0.4 0.4 0.4 1
101 | 0 0 0 0
102 |
103 |
104 |
105 |
106 |
107 |
108 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/scenes/stadium/stadium_grass.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/assets/scenes/stadium/stadium_grass.jpg
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/things/cube_small.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/pybulletgym/envs/assets/things/sphere_small.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/pybulletgym/envs/gym_utils.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import os
3 |
4 | from pybulletgym.envs.roboschool.robots.robot_bases import BodyPart
5 |
6 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
7 | parentdir = os.path.dirname(currentdir)
8 | os.sys.path.insert(0, parentdir)
9 | import pybullet_data
10 |
11 |
12 | def get_cube(p, x, y, z):
13 | body = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cube_small.urdf"), [x, y, z])
14 | p.changeDynamics(body, -1, mass=1.2) # match Roboschool
15 | part_name, _ = p.getBodyInfo(body)
16 | part_name = part_name.decode("utf8")
17 | bodies = [body]
18 | return BodyPart(p, part_name, bodies, 0, -1)
19 |
20 |
21 | def get_sphere(p, x, y, z):
22 | body = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "sphere2red_nocol.urdf"), [x, y, z])
23 | part_name, _ = p.getBodyInfo(body)
24 | part_name = part_name.decode("utf8")
25 | bodies = [body]
26 | return BodyPart(p, part_name, bodies, 0, -1)
27 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/envs/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/env_bases.py:
--------------------------------------------------------------------------------
1 | import gym, gym.spaces, gym.utils, gym.utils.seeding
2 | import numpy as np
3 | import pybullet
4 | from pybullet_utils import bullet_client
5 |
6 | from pkg_resources import parse_version
7 |
8 |
9 | class BaseBulletEnv(gym.Env):
10 | """
11 | Base class for Bullet physics simulation environments in a Scene.
12 | These environments create single-player scenes and behave like normal Gym environments, if
13 | you don't use multiplayer.
14 | """
15 |
16 | metadata = {
17 | 'render.modes': ['human', 'rgb_array'],
18 | 'video.frames_per_second': 60
19 | }
20 |
21 | def __init__(self, robot, render=False):
22 | self.scene = None
23 | self.physicsClientId = -1
24 | self.ownsPhysicsClient = 0
25 | self.camera = Camera()
26 | self.isRender = render
27 | self.robot = robot
28 | self._seed()
29 | self._cam_dist = 3
30 | self._cam_yaw = 0
31 | self._cam_pitch = -30
32 | self._render_width = 320
33 | self._render_height = 240
34 |
35 | self.action_space = robot.action_space
36 | self.observation_space = robot.observation_space
37 |
38 | def configure(self, args):
39 | self.robot.args = args
40 |
41 | def _seed(self, seed=None):
42 | self.np_random, seed = gym.utils.seeding.np_random(seed)
43 | self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
44 | return [seed]
45 |
46 | def _reset(self):
47 | if self.physicsClientId < 0:
48 | self.ownsPhysicsClient = True
49 |
50 | if self.isRender:
51 | self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
52 | else:
53 | self._p = bullet_client.BulletClient()
54 |
55 | self.physicsClientId = self._p._client
56 | self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI,0)
57 |
58 | if self.scene is None:
59 | self.scene = self.create_single_player_scene(self._p)
60 | if not self.scene.multiplayer and self.ownsPhysicsClient:
61 | self.scene.episode_restart(self._p)
62 |
63 | self.robot.scene = self.scene
64 |
65 | self.frame = 0
66 | self.done = 0
67 | self.reward = 0
68 | dump = 0
69 | s = self.robot.reset(self._p)
70 | self.potential = self.robot.calc_potential()
71 | return s
72 |
73 | def _render(self, mode, close=False):
74 | if mode == "human":
75 | self.isRender = True
76 | if mode != "rgb_array":
77 | return np.array([])
78 |
79 | base_pos = [0, 0, 0]
80 | if hasattr(self,'robot'):
81 | if hasattr(self.robot,'body_xyz'):
82 | base_pos = self.robot.body_xyz
83 |
84 | view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
85 | cameraTargetPosition=base_pos,
86 | distance=self._cam_dist,
87 | yaw=self._cam_yaw,
88 | pitch=self._cam_pitch,
89 | roll=0,
90 | upAxisIndex=2)
91 | proj_matrix = self._p.computeProjectionMatrixFOV(
92 | fov=60, aspect=float(self._render_width)/self._render_height,
93 | nearVal=0.1, farVal=100.0)
94 | (_, _, px, _, _) = self._p.getCameraImage(
95 | width = self._render_width, height=self._render_height, viewMatrix=view_matrix,
96 | projectionMatrix=proj_matrix,
97 | renderer=pybullet.ER_BULLET_HARDWARE_OPENGL
98 | )
99 | rgb_array = np.array(px)
100 | rgb_array = rgb_array[:, :, :3]
101 | return rgb_array
102 |
103 | def _close(self):
104 | if self.ownsPhysicsClient:
105 | if self.physicsClientId >= 0:
106 | self._p.disconnect()
107 | self.physicsClientId = -1
108 |
109 | def HUD(self, state, a, done):
110 | pass
111 |
112 | # backwards compatibility for gym >= v0.9.x
113 | # for extension of this class.
114 | def step(self, *args, **kwargs):
115 | return self._step(*args, **kwargs)
116 |
117 | if parse_version(gym.__version__)>=parse_version('0.9.6'):
118 | close = _close
119 | render = _render
120 | reset = _reset
121 | seed = _seed
122 |
123 |
124 | class Camera:
125 | def __init__(self):
126 | pass
127 |
128 | def move_and_look_at(self,i,j,k,x,y,z):
129 | lookat = [x,y,z]
130 | distance = 10
131 | yaw = 10
132 | self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
133 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/locomotion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/envs/locomotion/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/locomotion/ant_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv
2 | from pybulletgym.envs.mujoco.robots.locomotors.ant import Ant
3 |
4 |
5 | class AntMuJoCoEnv(WalkerBaseMuJoCoEnv):
6 | def __init__(self):
7 | self.robot = Ant()
8 | WalkerBaseMuJoCoEnv.__init__(self, self.robot)
9 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/locomotion/half_cheetah_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv
2 | from pybulletgym.envs.mujoco.robots.locomotors.half_cheetah import HalfCheetah
3 | import numpy as np
4 |
5 |
6 | class HalfCheetahMuJoCoEnv(WalkerBaseMuJoCoEnv):
7 | def __init__(self):
8 | self.robot = HalfCheetah()
9 | WalkerBaseMuJoCoEnv.__init__(self, self.robot)
10 |
11 | def step(self, a):
12 | if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
13 | self.robot.apply_action(a)
14 | self.scene.global_step()
15 |
16 | potential = self.robot.calc_potential()
17 | power_cost = -0.1 * np.square(a).sum()
18 | state = self.robot.calc_state()
19 |
20 | done = False
21 |
22 | debugmode = 0
23 | if debugmode:
24 | print("potential=")
25 | print(potential)
26 | print("power_cost=")
27 | print(power_cost)
28 |
29 | self.rewards = [
30 | potential,
31 | power_cost
32 | ]
33 | if debugmode:
34 | print("rewards=")
35 | print(self.rewards)
36 | print("sum rewards")
37 | print(sum(self.rewards))
38 | self.HUD(state, a, done)
39 | self.reward += sum(self.rewards)
40 |
41 | return state, sum(self.rewards), bool(done), {}
42 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/locomotion/hopper_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv
2 | from pybulletgym.envs.mujoco.robots.locomotors.hopper import Hopper
3 | import numpy as np
4 |
5 |
6 | class HopperMuJoCoEnv(WalkerBaseMuJoCoEnv):
7 | def __init__(self):
8 | self.robot = Hopper()
9 | WalkerBaseMuJoCoEnv.__init__(self, self.robot)
10 |
11 | def step(self, a):
12 | if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
13 | self.robot.apply_action(a)
14 | self.scene.global_step()
15 |
16 | alive_bonus = 1.0
17 | potential = self.robot.calc_potential()
18 | power_cost = -1e-3 * np.square(a).sum()
19 | state = self.robot.calc_state()
20 |
21 | height, ang = state[0], state[1]
22 |
23 | done = not (np.isfinite(state).all() and
24 | (np.abs(state[2:]) < 100).all() and
25 | (height > -0.3) and # height starts at 0 in pybullet
26 | (abs(ang) < .2))
27 |
28 | debugmode = 0
29 | if debugmode:
30 | print("potential=")
31 | print(potential)
32 | print("power_cost=")
33 | print(power_cost)
34 |
35 | self.rewards = [
36 | potential,
37 | alive_bonus,
38 | power_cost
39 | ]
40 | if debugmode:
41 | print("rewards=")
42 | print(self.rewards)
43 | print("sum rewards")
44 | print(sum(self.rewards))
45 | self.HUD(state, a, done)
46 | self.reward += sum(self.rewards)
47 |
48 | return state, sum(self.rewards), bool(done), {}
49 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/locomotion/humanoid_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv
2 | from pybulletgym.envs.mujoco.robots.locomotors.humanoid import Humanoid
3 |
4 |
5 | class HumanoidMuJoCoEnv(WalkerBaseMuJoCoEnv):
6 | def __init__(self, robot=None):
7 | self.robot = robot if robot is not None else Humanoid()
8 | WalkerBaseMuJoCoEnv.__init__(self, self.robot)
9 | self.electricity_cost = 4.25 * WalkerBaseMuJoCoEnv.electricity_cost
10 | self.stall_torque_cost = 4.25 * WalkerBaseMuJoCoEnv.stall_torque_cost
11 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/locomotion/walker2d_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.locomotion.walker_base_env import WalkerBaseMuJoCoEnv
2 | from pybulletgym.envs.mujoco.robots.locomotors.walker2d import Walker2D
3 | import numpy as np
4 |
5 |
6 | class Walker2DMuJoCoEnv(WalkerBaseMuJoCoEnv):
7 | def __init__(self):
8 | self.robot = Walker2D()
9 | WalkerBaseMuJoCoEnv.__init__(self, self.robot)
10 |
11 | def step(self, a):
12 | if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
13 | self.robot.apply_action(a)
14 | self.scene.global_step()
15 |
16 | alive_bonus = 1.0
17 | potential = self.robot.calc_potential()
18 | power_cost = -1e-3 * np.square(a).sum()
19 | state = self.robot.calc_state()
20 |
21 | height, ang = state[0], state[1]
22 |
23 | done = not (np.isfinite(state).all() and
24 | (np.abs(state[2:]) < 100).all() and
25 | (1.0 > height > -0.2) and # height starts at 0 in pybullet
26 | (-1.0 < ang < 1.0))
27 |
28 | debugmode = 0
29 | if debugmode:
30 | print("potential=")
31 | print(potential)
32 | print("power_cost=")
33 | print(power_cost)
34 |
35 | self.rewards = [
36 | potential,
37 | alive_bonus,
38 | power_cost
39 | ]
40 | if debugmode:
41 | print("rewards=")
42 | print(self.rewards)
43 | print("sum rewards")
44 | print(sum(self.rewards))
45 | self.HUD(state, a, done)
46 | self.reward += sum(self.rewards)
47 |
48 | return state, sum(self.rewards), bool(done), {}
49 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/manipulation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/envs/manipulation/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/manipulation/pusher_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.mujoco.robots.manipulators.pusher import Pusher
3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene
4 |
5 |
6 | class PusherBulletEnv(BaseBulletEnv):
7 | def __init__(self):
8 | self.robot = Pusher()
9 | BaseBulletEnv.__init__(self, self.robot)
10 |
11 | def create_single_player_scene(self, bullet_client):
12 | return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5)
13 |
14 | def _step(self, a):
15 | self.robot.apply_action(a)
16 | self.scene.global_step()
17 |
18 | state = self.robot.calc_state() # sets self.to_target_vec
19 |
20 | potential_old = self.potential
21 | self.potential = self.robot.calc_potential()
22 |
23 | joint_vel = np.array([
24 | self.robot.shoulder_pan_joint.get_velocity(),
25 | self.robot.shoulder_lift_joint.get_velocity(),
26 | self.robot.upper_arm_roll_joint.get_velocity(),
27 | self.robot.elbow_flex_joint.get_velocity(),
28 | self.robot.forearm_roll_joint.get_velocity(),
29 | self.robot.wrist_flex_joint.get_velocity(),
30 | self.robot.wrist_roll_joint.get_velocity()
31 | ])
32 |
33 | action_product = np.matmul(np.abs(a), np.abs(joint_vel))
34 | action_sum = np.sum(a)
35 |
36 | electricity_cost = (
37 | -0.10 * action_product # work torque*angular_velocity
38 | - 0.01 * action_sum # stall torque require some energy
39 | )
40 |
41 | stuck_joint_cost = 0
42 | for j in self.robot.ordered_joints:
43 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
44 | stuck_joint_cost += -0.1
45 |
46 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)]
47 | self.HUD(state, a, False)
48 | return state, sum(self.rewards), False, {}
49 |
50 | def calc_potential(self):
51 | return -100 * np.linalg.norm(self.to_target_vec)
52 |
53 | def camera_adjust(self):
54 | x, y, z = self.robot.fingertip.pose().xyz()
55 | x *= 0.5
56 | y *= 0.5
57 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
58 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/manipulation/reacher_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.mujoco.robots.manipulators.reacher import Reacher
3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene
4 | import numpy as np
5 |
6 |
7 | class ReacherBulletEnv(BaseBulletEnv):
8 | def __init__(self):
9 | self.robot = Reacher()
10 | BaseBulletEnv.__init__(self, self.robot)
11 |
12 | def create_single_player_scene(self, bullet_client):
13 | return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0165, frame_skip=1)
14 |
15 | def _step(self, a):
16 | assert (not self.scene.multiplayer)
17 | self.robot.apply_action(a)
18 | self.scene.global_step()
19 |
20 | state = self.robot.calc_state() # sets self.to_target_vec
21 |
22 | potential_old = self.potential
23 | self.potential = self.robot.calc_potential()
24 |
25 | electricity_cost = (
26 | -0.10 * (np.abs(a[0] * self.robot.theta_dot) + np.abs(a[1] * self.robot.gamma_dot)) # work torque*angular_velocity
27 | - 0.01 * (np.abs(a[0]) + np.abs(a[1])) # stall torque require some energy
28 | )
29 | stuck_joint_cost = -0.1 if np.abs(np.abs(self.robot.gamma) - 1) < 0.01 else 0.0
30 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)]
31 | self.HUD(state, a, False)
32 | return state, sum(self.rewards), False, {}
33 |
34 | def camera_adjust(self):
35 | x, y, z = self.robot.fingertip.pose().xyz()
36 | x *= 0.5
37 | y *= 0.5
38 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
39 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/manipulation/striker_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.mujoco.robots.manipulators.striker import Striker
3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene
4 | import numpy as np
5 |
6 |
7 | class StrikerBulletEnv(BaseBulletEnv):
8 | def __init__(self):
9 | self.robot = Striker()
10 | BaseBulletEnv.__init__(self, self.robot)
11 | self._striked = False
12 | self._min_strike_dist = np.inf
13 | self.strike_threshold = 0.1
14 |
15 | def create_single_player_scene(self, bullet_client):
16 | return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5)
17 |
18 | def _step(self, a):
19 | self.robot.apply_action(a)
20 | self.scene.global_step()
21 |
22 | state = self.robot.calc_state() # sets self.to_target_vec
23 |
24 | potential_old = self.potential
25 | self.potential = self.robot.calc_potential()
26 |
27 | joint_vel = np.array([
28 | self.robot.shoulder_pan_joint.get_velocity(),
29 | self.robot.shoulder_lift_joint.get_velocity(),
30 | self.robot.upper_arm_roll_joint.get_velocity(),
31 | self.robot.elbow_flex_joint.get_velocity(),
32 | self.robot.upper_arm_roll_joint.get_velocity(),
33 | self.robot.wrist_flex_joint.get_velocity(),
34 | self.robot.wrist_roll_joint.get_velocity()
35 | ])
36 |
37 | action_product = np.matmul(np.abs(a), np.abs(joint_vel))
38 | action_sum = np.sum(a)
39 |
40 | electricity_cost = (
41 | -0.10 * action_product # work torque*angular_velocity
42 | - 0.01 * action_sum # stall torque require some energy
43 | )
44 |
45 | stuck_joint_cost = 0
46 | for j in self.robot.ordered_joints:
47 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
48 | stuck_joint_cost += -0.1
49 |
50 | dist_object_finger = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
51 | reward_dist_vec = self.robot.object.pose().xyz() - self.robot.target.pose().xyz() # TODO: Should the object and target really belong to the robot? Maybe split this off
52 |
53 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(reward_dist_vec))
54 |
55 | if np.linalg.norm(dist_object_finger) < self.strike_threshold:
56 | self._striked = True
57 | self._strike_pos = self.robot.fingertip.pose().xyz()
58 |
59 | if self._striked:
60 | reward_near_vec = self.robot.object.pose().xyz() - self._strike_pos
61 | else:
62 | reward_near_vec = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
63 |
64 | reward_near = - np.linalg.norm(reward_near_vec)
65 |
66 | reward_dist = - np.linalg.norm(self._min_strike_dist)
67 | reward_ctrl = - np.square(a).sum()
68 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost),
69 | 3 * reward_dist, 0.1 * reward_ctrl, 0.5 * reward_near]
70 | self.HUD(state, a, False)
71 | return state, sum(self.rewards), False, {}
72 |
73 | def calc_potential(self):
74 | return -100 * np.linalg.norm(self.to_target_vec)
75 |
76 | def camera_adjust(self):
77 | x, y, z = self.robot.fingertip.pose().xyz()
78 | x *= 0.5
79 | y *= 0.5
80 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
81 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/manipulation/thrower_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.mujoco.robots.manipulators.thrower import Thrower
3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene
4 | import numpy as np
5 |
6 |
7 | class ThrowerBulletEnv(BaseBulletEnv):
8 | def __init__(self):
9 | self.robot = Thrower()
10 | BaseBulletEnv.__init__(self, self.robot)
11 |
12 | def create_single_player_scene(self, bullet_client):
13 | return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0020, frame_skip=5)
14 |
15 | def _step(self, a):
16 | self.robot.apply_action(a)
17 | self.scene.global_step()
18 | state = self.robot.calc_state() # sets self.to_target_vec
19 |
20 | potential_old = self.potential
21 | self.potential = self.robot.calc_potential()
22 |
23 | joint_vel = np.array([
24 | self.robot.shoulder_pan_joint.get_velocity(),
25 | self.robot.shoulder_lift_joint.get_velocity(),
26 | self.robot.upper_arm_roll_joint.get_velocity(),
27 | self.robot.elbow_flex_joint.get_velocity(),
28 | self.robot.upper_arm_roll_joint.get_velocity(),
29 | self.robot.wrist_flex_joint.get_velocity(),
30 | self.robot.wrist_roll_joint.get_velocity()
31 | ])
32 |
33 | action_product = np.matmul(np.abs(a), np.abs(joint_vel))
34 | action_sum = np.sum(a)
35 |
36 | electricity_cost = (
37 | -0.10 * action_product # work torque*angular_velocity
38 | - 0.01 * action_sum # stall torque require some energy
39 | )
40 |
41 | stuck_joint_cost = 0
42 | for j in self.robot.ordered_joints:
43 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
44 | stuck_joint_cost += -0.1
45 |
46 | object_xy = self.robot.object.pose().xyz()[:2]
47 | target_xy = self.robot.target.pose().xyz()[:2]
48 |
49 | if not self.robot._object_hit_ground and self.robot.object.pose().xyz()[2] < -0.25: # TODO: Should the object and target really belong to the robot? Maybe split this off
50 | self.robot._object_hit_ground = True
51 | self.robot._object_hit_location = self.robot.object.pose().xyz()
52 |
53 | if self.robot._object_hit_ground:
54 | object_hit_xy = self.robot._object_hit_location[:2]
55 | reward_dist = -np.linalg.norm(object_hit_xy - target_xy)
56 | else:
57 | reward_dist = -np.linalg.norm(object_xy - target_xy)
58 | reward_ctrl = - np.square(a).sum()
59 |
60 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost),
61 | reward_dist, 0.002 * reward_ctrl]
62 | self.HUD(state, a, False)
63 | return state, sum(self.rewards), False, {}
64 |
65 | def camera_adjust(self):
66 | x, y, z = self.robot.fingertip.pose().xyz()
67 | x *= 0.5
68 | y *= 0.5
69 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
70 |
71 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/pendulum/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/envs/pendulum/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/pendulum/inverted_double_pendulum_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.mujoco.robots.pendula.inverted_double_pendulum import InvertedDoublePendulum
3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene
4 |
5 |
6 | class InvertedDoublePendulumMuJoCoEnv(BaseBulletEnv):
7 | def __init__(self):
8 | self.robot = InvertedDoublePendulum()
9 | BaseBulletEnv.__init__(self, self.robot)
10 | self.stateId = -1
11 |
12 | def create_single_player_scene(self, bullet_client):
13 | return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)
14 |
15 | def reset(self):
16 | if self.stateId >= 0:
17 | self._p.restoreState(self.stateId)
18 | r = BaseBulletEnv._reset(self)
19 | if self.stateId < 0:
20 | self.stateId = self._p.saveState()
21 | return r
22 |
23 | def step(self, a):
24 | self.robot.apply_action(a)
25 | self.scene.global_step()
26 | state = self.robot.calc_state()
27 | # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
28 | # using tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
29 | pos_x, _, pos_y = self.robot.pole2.pose().xyz()
30 | dist_penalty = 0.01 * pos_x ** 2 + (pos_y + 0.3 - 2) ** 2
31 | v1, v2 = self.robot.j1.current_position()[1], self.robot.j2.current_position()[1]
32 | vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
33 | alive_bonus = 10
34 | done = pos_y + 0.3 <= 1
35 | self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)]
36 | self.HUD(state, a, done)
37 | return state, sum(self.rewards), done, {}
38 |
39 | def camera_adjust(self):
40 | self.camera.move_and_look_at(0, 1.2, 1.2, 0, 0, 0.5)
41 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/envs/pendulum/inverted_pendulum_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.mujoco.robots.pendula.inverted_pendulum import InvertedPendulum
3 | from pybulletgym.envs.mujoco.scenes.scene_bases import SingleRobotEmptyScene
4 | import numpy as np
5 |
6 |
7 | class InvertedPendulumMuJoCoEnv(BaseBulletEnv):
8 | def __init__(self):
9 | self.robot = InvertedPendulum()
10 | BaseBulletEnv.__init__(self, self.robot)
11 | self.stateId = -1
12 |
13 | def create_single_player_scene(self, bullet_client):
14 | return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)
15 |
16 | def reset(self):
17 | if self.stateId >= 0:
18 | # print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")")
19 | self._p.restoreState(self.stateId)
20 | r = BaseBulletEnv._reset(self)
21 | if self.stateId < 0:
22 | self.stateId = self._p.saveState()
23 | # print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId)
24 | return r
25 |
26 | def step(self, a):
27 | self.robot.apply_action(a)
28 | self.scene.global_step()
29 | state = self.robot.calc_state() # sets self.pos_x self.pos_y
30 | vel_penalty = 0
31 | reward = 1.0
32 | done = not np.isfinite(state).all() or np.abs(state[1]) > .2
33 | self.rewards = [float(reward)]
34 | self.HUD(state, a, done)
35 | return state, sum(self.rewards), done, {}
36 |
37 | def camera_adjust(self):
38 | self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
39 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/robots/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/locomotors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/robots/locomotors/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/locomotors/ant.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot
3 | import numpy as np
4 |
5 |
6 | class Ant(WalkerBase, MJCFBasedRobot):
7 | foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot']
8 |
9 | def __init__(self):
10 | WalkerBase.__init__(self, power=2.5)
11 | MJCFBasedRobot.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=111)
12 |
13 | def calc_state(self):
14 | WalkerBase.calc_state(self)
15 | pose = self.parts['torso'].get_pose()
16 | qpos = np.hstack((pose, [j.get_position() for j in self.ordered_joints])).flatten() # shape (15,)
17 |
18 | velocity = self.parts['torso'].get_velocity()
19 | qvel = np.hstack((velocity[0], velocity[1], [j.get_velocity() for j in self.ordered_joints])).flatten() # shape (14,)
20 |
21 | cfrc_ext = np.zeros((14, 6)) # shape (14, 6) # TODO: FIND cfrc_ext
22 | return np.concatenate([
23 | qpos.flat[2:], # self.sim.data.qpos.flat[2:],
24 | qvel.flat, # self.sim.data.qvel.flat,
25 | np.clip(cfrc_ext, -1, 1).flat # np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
26 | ])
27 |
28 | def alive_bonus(self, z, pitch):
29 | return +1 if z > 0.26 else -1 # 0.25 is central sphere rad, die if it scrapes the ground
30 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/locomotors/half_cheetah.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot
3 | import numpy as np
4 |
5 |
6 | class HalfCheetah(WalkerBase, MJCFBasedRobot):
7 | """
8 | Half Cheetah implementation based on MuJoCo.
9 | """
10 | foot_list = ["ffoot", "fshin", "fthigh", "bfoot", "bshin", "bthigh"] # track these contacts with ground
11 |
12 | def __init__(self):
13 | WalkerBase.__init__(self, power=1)
14 | MJCFBasedRobot.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=17, add_ignored_joints=True)
15 |
16 | self.pos_after = 0
17 |
18 | def calc_state(self):
19 | qpos = np.array([j.get_position() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (9,)
20 | qvel = np.array([j.get_velocity() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (9,)
21 |
22 | return np.concatenate([
23 | qpos.flat[1:], # self.sim.data.qpos.flat[1:],
24 | qvel.flat # self.sim.data.qvel.flat,
25 | ])
26 |
27 | def calc_potential(self):
28 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
29 | # all rewards have rew/frame units and close to 1.0
30 | pos_before = self.pos_after
31 | self.pos_after = self.robot_body.get_pose()[0]
32 | debugmode = 0
33 | if debugmode:
34 | print("calc_potential: self.walk_target_dist")
35 | print(self.walk_target_dist)
36 | print("self.scene.dt")
37 | print(self.scene.dt)
38 | print("self.scene.frame_skip")
39 | print(self.scene.frame_skip)
40 | print("self.scene.timestep")
41 | print(self.scene.timestep)
42 | return (self.pos_after-pos_before) / self.scene.dt
43 |
44 | def robot_specific_reset(self, bullet_client):
45 | WalkerBase.robot_specific_reset(self, bullet_client)
46 | for part_id, part in self.parts.items():
47 | self._p.changeDynamics(part.bodyIndex, part.bodyPartIndex, lateralFriction=0.8, spinningFriction=0.1, rollingFriction=0.1, restitution=0.5)
48 |
49 | self.jdict["bthigh"].power_coef = 120.0
50 | self.jdict["bshin"].power_coef = 90.0
51 | self.jdict["bfoot"].power_coef = 60.0
52 | self.jdict["fthigh"].power_coef = 140.0
53 | self.jdict["fshin"].power_coef = 60.0
54 | self.jdict["ffoot"].power_coef = 30.0
55 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/locomotors/hopper.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot
3 | import numpy as np
4 |
5 |
6 | class Hopper(WalkerBase, MJCFBasedRobot):
7 | """
8 | Hopper implementation based on MuJoCo.
9 | """
10 | foot_list = ["foot"]
11 |
12 | def __init__(self):
13 | WalkerBase.__init__(self, power=0.75)
14 | MJCFBasedRobot.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=11, add_ignored_joints=True)
15 |
16 | self.pos_after = 0
17 |
18 | def calc_state(self):
19 | qpos = np.array([j.get_position() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (6,)
20 | qvel = np.array([j.get_velocity() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (6,)
21 |
22 | return np.concatenate([
23 | qpos.flat[1:], # self.sim.data.qpos.flat[1:],
24 | np.clip(qvel, -10, 10).flat # self.sim.data.qvel.flat,
25 | ])
26 |
27 | def calc_potential(self):
28 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
29 | # all rewards have rew/frame units and close to 1.0
30 | pos_before = self.pos_after
31 | self.pos_after = self.robot_body.get_pose()[0]
32 | debugmode = 0
33 | if debugmode:
34 | print("calc_potential: self.walk_target_dist")
35 | print(self.walk_target_dist)
36 | print("self.scene.dt")
37 | print(self.scene.dt)
38 | print("self.scene.frame_skip")
39 | print(self.scene.frame_skip)
40 | print("self.scene.timestep")
41 | print(self.scene.timestep)
42 | return (self.pos_after-pos_before) / self.scene.dt
43 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/locomotors/humanoid.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot
3 | import numpy as np
4 |
5 |
6 | class Humanoid(WalkerBase, MJCFBasedRobot):
7 | self_collision = True
8 | foot_list = ["right_foot", "left_foot"] # "left_hand", "right_hand"
9 |
10 | def __init__(self, random_yaw=False, random_lean=False):
11 | WalkerBase.__init__(self, power=0.41)
12 | MJCFBasedRobot.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=376)
13 | # 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25
14 | self.random_yaw = random_yaw
15 | self.random_lean = random_lean
16 |
17 | def robot_specific_reset(self, bullet_client):
18 | WalkerBase.robot_specific_reset(self, bullet_client)
19 | self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"]
20 | self.motor_power = [100, 100, 100]
21 | self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"]
22 | self.motor_power += [100, 100, 300, 200]
23 | self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"]
24 | self.motor_power += [100, 100, 300, 200]
25 | self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"]
26 | self.motor_power += [75, 75, 75]
27 | self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"]
28 | self.motor_power += [75, 75, 75]
29 | self.motors = [self.jdict[n] for n in self.motor_names]
30 | if self.random_yaw:
31 | position = [0, 0, 0]
32 | orientation = [0, 0, 0]
33 | yaw = self.np_random.uniform(low=-3.14, high=3.14)
34 | if self.random_lean and self.np_random.randint(2) == 0:
35 | if self.np_random.randint(2) == 0:
36 | pitch = np.pi/2
37 | position = [0, 0, 0.45]
38 | else:
39 | pitch = np.pi*3/2
40 | position = [0, 0, 0.25]
41 | roll = 0
42 | orientation = [roll, pitch, yaw]
43 | else:
44 | position = [0, 0, 1.4]
45 | orientation = [0, 0, yaw] # just face random direction, but stay straight otherwise
46 | self.robot_body.reset_position(position)
47 | self.robot_body.reset_orientation(p.getQuaternionFromEuler(orientation))
48 | self.initial_z = 0.8
49 |
50 | def calc_state(self):
51 | WalkerBase.calc_state(self)
52 |
53 | pose = self.parts['torso'].get_pose()
54 | qpos = np.hstack((pose, [j.get_position() for j in self.ordered_joints])).flatten() # shape (24,)
55 |
56 | velocity = self.parts['torso'].get_velocity()
57 | qvel = np.hstack((velocity[0], velocity[1], [j.get_velocity() for j in self.ordered_joints])).flatten() # shape (23,)
58 |
59 | cinert = np.zeros((14, 10)) # shape (14, 10) # TODO: FIND
60 | cvel = np.zeros((14, 6)) # shape (14, 6) # TODO: FIND
61 | qfrc_actuator = np.zeros(23) # shape (23,) # TODO: FIND
62 | cfrc_ext = np.zeros((14, 6)) # shape (14, 6) # TODO: FIND cfrc_ext
63 | return np.concatenate([
64 | qpos.flat[2:], # self.sim.data.qpos.flat[2:],
65 | qvel.flat, # self.sim.data.qvel.flat,
66 | cinert.flat, # data.cinert.flat,
67 | cvel.flat, # data.cvel.flat,
68 | qfrc_actuator.flat, # data.qfrc_actuator.flat,
69 | cfrc_ext.flat # data.cfrc_ext.flat
70 | ])
71 |
72 | def apply_action(self, a):
73 | assert(np.isfinite(a).all())
74 | force_gain = 1
75 | for i, m, power in zip(range(17), self.motors, self.motor_power):
76 | m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1)))
77 |
78 | def alive_bonus(self, z, pitch):
79 | return +2 if z > 0.78 else -1 # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying
80 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/locomotors/walker2d.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot
3 | import numpy as np
4 |
5 |
6 | class Walker2D(WalkerBase, MJCFBasedRobot):
7 | """
8 | Walker2D implementation based on MuJoCo.
9 | """
10 | foot_list = ["foot", "foot_left"]
11 |
12 | def __init__(self):
13 | WalkerBase.__init__(self, power=0.40)
14 | MJCFBasedRobot.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=17, add_ignored_joints=True)
15 |
16 | self.pos_after = 0
17 |
18 | def calc_state(self):
19 | qpos = np.array([j.get_position() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (9,)
20 | qvel = np.array([j.get_velocity() for j in self.ordered_joints], dtype=np.float32).flatten() # shape (9,)
21 |
22 | return np.concatenate([
23 | qpos[1:], # qpos[1:]
24 | np.clip(qvel, -10, 10) # np.clip(qvel, -10, 10)
25 | ])
26 |
27 | def calc_potential(self):
28 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
29 | # all rewards have rew/frame units and close to 1.0
30 | pos_before = self.pos_after
31 | self.pos_after = self.robot_body.get_pose()[0]
32 | debugmode = 0
33 | if debugmode:
34 | print("calc_potential: self.walk_target_dist")
35 | print(self.walk_target_dist)
36 | print("self.scene.dt")
37 | print(self.scene.dt)
38 | print("self.scene.frame_skip")
39 | print(self.scene.frame_skip)
40 | print("self.scene.timestep")
41 | print(self.scene.timestep)
42 | return (self.pos_after-pos_before) / self.scene.dt
43 |
44 | def robot_specific_reset(self, bullet_client):
45 | WalkerBase.robot_specific_reset(self, bullet_client)
46 | for n in ["foot_joint", "foot_left_joint"]:
47 | self.jdict[n].power_coef = 30.0
48 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/locomotors/walker_base.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.robot_bases import XmlBasedRobot
2 | import numpy as np
3 |
4 |
5 | class WalkerBase(XmlBasedRobot):
6 | def __init__(self, power):
7 | self.power = power
8 | self.camera_x = 0
9 | self.start_pos_x, self.start_pos_y, self.start_pos_z = 0, 0, 0
10 | self.walk_target_x = 1e3 # kilometers away
11 | self.walk_target_y = 0
12 | self.body_xyz = [0, 0, 0]
13 |
14 | def robot_specific_reset(self, bullet_client):
15 | self._p = bullet_client
16 | for j in self.ordered_joints:
17 | j.reset_current_position(self.np_random.uniform(low=-0.1, high=0.1), 0)
18 |
19 | self.feet = [self.parts[f] for f in self.foot_list]
20 | self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32)
21 | self.scene.actor_introduce(self)
22 | self.initial_z = None
23 |
24 | def apply_action(self, a):
25 | assert (np.isfinite(a).all())
26 | i = 0
27 | for n, j in enumerate(self.ordered_joints):
28 | if j.power_coef != 0: # in case the ignored joints are added, they have 0 power
29 | j.set_motor_torque(self.power * j.power_coef * float(np.clip(a[n-i], -1, +1)))
30 | else:
31 | i += 1
32 |
33 | def calc_state(self):
34 | j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten()
35 | # even elements [0::2] position, scaled to -1..+1 between limits
36 | # odd elements [1::2] angular speed, scaled to show -1..+1
37 | self.joint_speeds = j[1::2]
38 | self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99)
39 |
40 | body_pose = self.robot_body.pose()
41 | parts_xyz = np.array([p.pose().xyz() for p in self.parts.values()]).flatten()
42 | self.body_xyz = (
43 | parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2]) # torso z is more informative than mean z
44 | self.body_rpy = body_pose.rpy()
45 | z = self.body_xyz[2]
46 | if self.initial_z is None:
47 | self.initial_z = z
48 | r, p, yaw = self.body_rpy
49 | self.walk_target_theta = np.arctan2(self.walk_target_y - self.body_xyz[1],
50 | self.walk_target_x - self.body_xyz[0])
51 | self.walk_target_dist = np.linalg.norm(
52 | [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]])
53 | angle_to_target = self.walk_target_theta - yaw
54 |
55 | rot_speed = np.array(
56 | [[np.cos(-yaw), -np.sin(-yaw), 0],
57 | [np.sin(-yaw), np.cos(-yaw), 0],
58 | [ 0, 0, 1]]
59 | )
60 | vx, vy, vz = np.dot(rot_speed, self.robot_body.speed()) # rotate speed back to body point of view
61 |
62 | more = np.array([ z-self.initial_z,
63 | np.sin(angle_to_target), np.cos(angle_to_target),
64 | 0.3 * vx, 0.3 * vy, 0.3 * vz, # 0.3 is just scaling typical speed into -1..+1, no physical sense here
65 | r, p], dtype=np.float32)
66 | return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5)
67 |
68 | def calc_potential(self):
69 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
70 | # all rewards have rew/frame units and close to 1.0
71 | try:
72 | debugmode = 0
73 | if debugmode:
74 | print("calc_potential: self.walk_target_dist")
75 | print(self.walk_target_dist)
76 | print("self.scene.dt")
77 | print(self.scene.dt)
78 | print("self.scene.frame_skip")
79 | print(self.scene.frame_skip)
80 | print("self.scene.timestep")
81 | print(self.scene.timestep)
82 | return - self.walk_target_dist / self.scene.dt
83 | except AttributeError:
84 | return - self.walk_target_dist
85 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/manipulators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/robots/manipulators/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/manipulators/pusher.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot
2 | import numpy as np
3 |
4 |
5 | class Pusher(MJCFBasedRobot):
6 | min_target_placement_radius = 0.5
7 | max_target_placement_radius = 0.8
8 | min_object_to_target_distance = 0.1
9 | max_object_to_target_distance = 0.4
10 |
11 | def __init__(self):
12 | MJCFBasedRobot.__init__(self, 'pusher.xml', 'body0', action_dim=7, obs_dim=55)
13 |
14 | def robot_specific_reset(self, bullet_client):
15 | # parts
16 | self.fingertip = self.parts["tips_arm"]
17 | self.target = self.parts["goal"]
18 | self.object = self.parts["object"]
19 |
20 | # joints
21 | self.shoulder_pan_joint = self.jdict["r_shoulder_pan_joint"]
22 | self.shoulder_lift_joint = self.jdict["r_shoulder_lift_joint"]
23 | self.upper_arm_roll_joint = self.jdict["r_upper_arm_roll_joint"]
24 | self.elbow_flex_joint = self.jdict["r_elbow_flex_joint"]
25 | self.forearm_roll_joint = self.jdict["r_forearm_roll_joint"]
26 | self.wrist_flex_joint = self.jdict["r_wrist_flex_joint"]
27 | self.wrist_roll_joint = self.jdict["r_wrist_roll_joint"]
28 |
29 | self.target_pos = np.concatenate([
30 | self.np_random.uniform(low=-1, high=1, size=1),
31 | self.np_random.uniform(low=-1, high=1, size=1)
32 | ])
33 |
34 | # make length of vector between min and max_target_placement_radius
35 | self.target_pos = self.target_pos \
36 | / np.linalg.norm(self.target_pos) \
37 | * self.np_random.uniform(low=self.min_target_placement_radius,
38 | high=self.max_target_placement_radius, size=1)
39 |
40 | self.object_pos = np.concatenate([
41 | self.np_random.uniform(low=-1, high=1, size=1),
42 | self.np_random.uniform(low=-1, high=1, size=1)
43 | ])
44 |
45 | # make length of vector between min and max_object_to_target_distance
46 | self.object_pos = self.object_pos \
47 | / np.linalg.norm(self.object_pos - self.target_pos) \
48 | * self.np_random.uniform(low=self.min_object_to_target_distance,
49 | high=self.max_object_to_target_distance, size=1)
50 |
51 | # set position of objects
52 | self.zero_offset = np.array([0.45, 0.55])
53 | self.jdict["goal_slidex"].reset_current_position(self.target_pos[0] - self.zero_offset[0], 0)
54 | self.jdict["goal_slidey"].reset_current_position(self.target_pos[1] - self.zero_offset[1], 0)
55 | self.jdict["obj_slidex"].reset_current_position(self.object_pos[0] - self.zero_offset[0], 0)
56 | self.jdict["obj_slidey"].reset_current_position(self.object_pos[1] - self.zero_offset[1], 0)
57 |
58 | # randomize all joints TODO: Will this work or do we have to constrain this resetting in some way?
59 | self.shoulder_pan_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
60 | self.shoulder_lift_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
61 | self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
62 | self.elbow_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
63 | self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
64 | self.wrist_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
65 | self.wrist_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
66 |
67 | def apply_action(self, a):
68 | assert (np.isfinite(a).all())
69 | self.shoulder_pan_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
70 | self.shoulder_lift_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
71 | self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[2], -1, +1)))
72 | self.elbow_flex_joint.set_motor_torque(0.05 * float(np.clip(a[3], -1, +1)))
73 | self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[4], -1, +1)))
74 | self.wrist_flex_joint.set_motor_torque(0.05 * float(np.clip(a[5], -1, +1)))
75 | self.wrist_roll_joint.set_motor_torque(0.05 * float(np.clip(a[6], -1, +1)))
76 |
77 | def calc_state(self):
78 | qpos = np.array([j.current_position() for j in self.ordered_joints]) # shape (11,)
79 | qvel = np.array([j.current_relative_position() for j in self.ordered_joints]) # shape (11,)
80 | tips_arm_body_com = self.fingertip.pose().xyz() # shape (3,)
81 | object_body_com = self.object.pose().xyz() # shape (3,)
82 | goal_body_com = self.target.pose().xyz() # shape (3,)
83 |
84 | return np.concatenate([
85 | qpos.flat[:7], # self.sim.data.qpos.flat[:7],
86 | qvel.flat[:7], # self.sim.data.qvel.flat[:7],
87 | tips_arm_body_com, # self.get_body_com("tips_arm"),
88 | object_body_com, # self.get_body_com("object"),
89 | goal_body_com # self.get_body_com("goal"),
90 | ])
91 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/manipulators/reacher.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot
2 | import numpy as np
3 |
4 |
5 | class Reacher(MJCFBasedRobot):
6 | TARG_LIMIT = 0.27
7 |
8 | def __init__(self):
9 | MJCFBasedRobot.__init__(self, 'reacher.xml', 'body0', action_dim=2, obs_dim=9)
10 |
11 | def robot_specific_reset(self, bullet_client):
12 | self.jdict["target_x"].reset_current_position(
13 | self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0)
14 | self.jdict["target_y"].reset_current_position(
15 | self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0)
16 | self.fingertip = self.parts["fingertip"]
17 | self.target = self.parts["target"]
18 | self.central_joint = self.jdict["joint0"]
19 | self.elbow_joint = self.jdict["joint1"]
20 | self.central_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
21 | self.elbow_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
22 |
23 | def apply_action(self, a):
24 | assert (np.isfinite(a).all())
25 | self.central_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
26 | self.elbow_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
27 |
28 | def calc_state(self):
29 | target_x, target_vx = self.jdict["target_x"].current_position()
30 | target_y, target_vy = self.jdict["target_y"].current_position()
31 |
32 | qpos = np.array([j.current_position() for j in self.ordered_joints]) # shape (4,)
33 | qvel = np.array([j.current_relative_position()[1] for j in self.ordered_joints]) # shape (4,) # TODO: Add target pos and vel
34 |
35 | theta = qpos[:2]
36 | self.to_target_vec = np.array(self.fingertip.pose().xyz()) - np.array(self.target.pose().xyz()) # shape (3,)
37 |
38 | return np.concatenate([
39 | np.cos(theta), # np.cos(theta),
40 | np.sin(theta), # np.sin(theta),
41 | qpos.flat[2:], # self.sim.data.qpos.flat[2:],
42 | qvel.flat[:2], # self.sim.data.qvel.flat[:2],
43 | self.to_target_vec # self.get_body_com("fingertip") - self.get_body_com("target")
44 | ])
45 |
46 | def calc_potential(self):
47 | return -100 * np.linalg.norm(self.to_target_vec)
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/pendula/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/robots/pendula/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/pendula/inverted_double_pendulum.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot
2 | import numpy as np
3 |
4 |
5 | class InvertedDoublePendulum(MJCFBasedRobot):
6 | def __init__(self):
7 | MJCFBasedRobot.__init__(self, 'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=11)
8 |
9 | def robot_specific_reset(self, bullet_client):
10 | self._p = bullet_client
11 | self.pole2 = self.parts["pole2"]
12 | self.slider = self.jdict["slider"]
13 | self.j1 = self.jdict["hinge"]
14 | self.j2 = self.jdict["hinge2"]
15 | u = self.np_random.uniform(low=-.1, high=.1, size=[2])
16 | self.j1.reset_current_position(float(u[0]), 0)
17 | self.j2.reset_current_position(float(u[1]), 0)
18 | self.j1.set_motor_torque(0)
19 | self.j2.set_motor_torque(0)
20 |
21 | def apply_action(self, a):
22 | assert( np.isfinite(a).all() )
23 | self.slider.set_motor_torque( 200*float(np.clip(a[0], -1, +1)) )
24 |
25 | def calc_state(self):
26 | x, vx = self.slider.current_position()
27 | theta, theta_dot = self.j1.current_position()
28 | gamma, gamma_dot = self.j2.current_position()
29 |
30 | assert(np.isfinite(x))
31 |
32 | qpos = np.array([x, theta, gamma]) # shape (3,)
33 | qvel = np.array([vx, theta_dot, gamma_dot]) # shape (3,)
34 | qfrc_constraint = np.zeros(3) # shape (3,) # TODO: FIND qfrc_constraint in pybullet
35 | return np.concatenate([
36 | qpos[:1], # self.sim.data.qpos[:1], # cart x pos
37 | np.sin(qpos[1:]), # np.sin(self.sim.data.qpos[1:]), # link angles
38 | np.cos(qpos[1:]), # np.cos(self.sim.data.qpos[1:]),
39 | np.clip(qvel, -10, 10), # np.clip(self.sim.data.qvel, -10, 10),
40 | np.clip(qfrc_constraint, -10, 10) # np.clip(self.sim.data.qfrc_constraint, -10, 10)
41 | ]).ravel()
42 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/robots/pendula/inverted_pendulum.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.mujoco.robots.robot_bases import MJCFBasedRobot
2 | import numpy as np
3 |
4 |
5 | class InvertedPendulum(MJCFBasedRobot):
6 |
7 | def __init__(self):
8 | MJCFBasedRobot.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=4)
9 |
10 | def robot_specific_reset(self, bullet_client):
11 | self._p = bullet_client
12 | self.pole = self.parts["pole"]
13 | self.slider = self.jdict["slider"]
14 | self.j1 = self.jdict["hinge"]
15 | u = self.np_random.uniform(low=-.1, high=.1)
16 | self.j1.reset_current_position(u, 0)
17 | self.j1.set_motor_torque(0)
18 |
19 | def apply_action(self, a):
20 | assert(np.isfinite(a).all())
21 | if not np.isfinite(a).all():
22 | print("a is inf")
23 | a[0] = 0
24 | self.slider.set_motor_torque(100*float(np.clip(a[0], -1, +1)))
25 |
26 | def calc_state(self):
27 | x, vx = self.slider.current_position()
28 | self.theta, theta_dot = self.j1.current_position()
29 | assert(np.isfinite(x))
30 |
31 | if not np.isfinite(x):
32 | print("x is inf")
33 | x = 0
34 |
35 | if not np.isfinite(vx):
36 | print("vx is inf")
37 | vx = 0
38 |
39 | if not np.isfinite(self.theta):
40 | print("theta is inf")
41 | self.theta = 0
42 |
43 | if not np.isfinite(theta_dot):
44 | print("theta_dot is inf")
45 | theta_dot = 0
46 |
47 | qpos = np.array([x, self.theta]) # shape (2,)
48 | qvel = np.array([vx, theta_dot]) # shape (2,)
49 |
50 | return np.concatenate([
51 | qpos, # self.sim.data.qpos
52 | qvel]).ravel() # self.sim.data.qvel
53 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/scenes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/mujoco/scenes/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/scenes/scene_bases.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | sys.path.append(os.path.dirname(__file__))
5 |
6 | import gym
7 |
8 |
9 | class Scene:
10 | """A base class for single- and multiplayer scenes"""
11 |
12 | def __init__(self, bullet_client, gravity, timestep, frame_skip):
13 | self._p = bullet_client
14 | self.np_random, seed = gym.utils.seeding.np_random(None)
15 | self.timestep = timestep
16 | self.frame_skip = frame_skip
17 |
18 | self.dt = self.timestep * self.frame_skip
19 | self.cpp_world = World(self._p, gravity, timestep, frame_skip)
20 |
21 | self.test_window_still_open = True # or never opened
22 | self.human_render_detected = False # if user wants render("human"), we open test window
23 |
24 | self.multiplayer_robots = {}
25 |
26 | def test_window(self):
27 | "Call this function every frame, to see what's going on. Not necessary in learning."
28 | self.human_render_detected = True
29 | return self.test_window_still_open
30 |
31 | def actor_introduce(self, robot):
32 | "Usually after scene reset"
33 | if not self.multiplayer: return
34 | self.multiplayer_robots[robot.player_n] = robot
35 |
36 | def actor_is_active(self, robot):
37 | """
38 | Used by robots to see if they are free to exclusiveley put their HUD on the test window.
39 | Later can be used for click-focus robots.
40 | """
41 | return not self.multiplayer
42 |
43 | def episode_restart(self, bullet_client):
44 | "This function gets overridden by specific scene, to reset specific objects into their start positions"
45 | self.cpp_world.clean_everything()
46 | #self.cpp_world.test_window_history_reset()
47 |
48 | def global_step(self):
49 | """
50 | The idea is: apply motor torques for all robots, then call global_step(), then collect
51 | observations from robots using step() with the same action.
52 | """
53 | self.cpp_world.step(self.frame_skip)
54 |
55 |
56 | class SingleRobotEmptyScene(Scene):
57 | multiplayer = False # this class is used "as is" for InvertedPendulum, Reacher
58 |
59 |
60 | class World:
61 |
62 | def __init__(self, bullet_client, gravity, timestep, frame_skip):
63 | self._p = bullet_client
64 | self.gravity = gravity
65 | self.timestep = timestep
66 | self.frame_skip = frame_skip
67 | self.numSolverIterations = 5
68 | self.clean_everything()
69 |
70 | def clean_everything(self):
71 | # p.resetSimulation()
72 | self._p.setGravity(0, 0, -self.gravity)
73 | self._p.setDefaultContactERP(0.9)
74 | # print("self.numSolverIterations=",self.numSolverIterations)
75 | self._p.setPhysicsEngineParameter(fixedTimeStep=self.timestep*self.frame_skip, numSolverIterations=self.numSolverIterations, numSubSteps=self.frame_skip)
76 |
77 | def step(self, frame_skip):
78 | self._p.stepSimulation()
79 |
80 |
81 |
--------------------------------------------------------------------------------
/pybulletgym/envs/mujoco/scenes/stadium.py:
--------------------------------------------------------------------------------
1 | import os, inspect
2 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
3 | parentdir = os.path.dirname(currentdir)
4 | os.sys.path.insert(0,parentdir)
5 |
6 | from .scene_bases import Scene
7 | import pybullet
8 |
9 |
10 | class StadiumScene(Scene):
11 | multiplayer = False
12 | zero_at_running_strip_start_line = True # if False, center of coordinates (0,0,0) will be at the middle of the stadium
13 | stadium_halflen = 105*0.25 # FOOBALL_FIELD_HALFLEN
14 | stadium_halfwidth = 50*0.25 # FOOBALL_FIELD_HALFWID
15 | stadiumLoaded = 0
16 |
17 | def episode_restart(self, bullet_client):
18 | self._p = bullet_client
19 | Scene.episode_restart(self, bullet_client)
20 | if self.stadiumLoaded == 0:
21 | self.stadiumLoaded = 1
22 |
23 | # stadium_pose = cpp_household.Pose()
24 | # if self.zero_at_running_strip_start_line:
25 | # stadium_pose.set_xyz(27, 21, 0) # see RUN_STARTLINE, RUN_RAD constants
26 |
27 | filename = os.path.join(os.path.dirname(__file__), "..", "assets", "scenes", "stadium", "plane_stadium.sdf")
28 | self.ground_plane_mjcf=self._p.loadSDF(filename)
29 | #filename = os.path.join(pybullet_data.getDataPath(),"stadium_no_collision.sdf")
30 | #self.ground_plane_mjcf = p.loadSDF(filename)
31 | #
32 | for i in self.ground_plane_mjcf:
33 | self._p.changeDynamics(i,-1,lateralFriction=0.8, restitution=0.5)
34 | self._p.changeVisualShape(i,-1,rgbaColor=[1,1,1,0.8])
35 | self._p.configureDebugVisualizer(pybullet.COV_ENABLE_PLANAR_REFLECTION,1)
36 |
37 | # for j in range(pybullet.getNumJoints(i)):
38 | # self._p.changeDynamics(i,j,lateralFriction=0)
39 | #despite the name (stadium_no_collision), it DID have collision, so don't add duplicate ground
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/envs/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/env_bases.py:
--------------------------------------------------------------------------------
1 | import gym, gym.spaces, gym.utils, gym.utils.seeding
2 | import numpy as np
3 | import pybullet
4 | from pybullet_utils import bullet_client
5 |
6 | from pkg_resources import parse_version
7 |
8 |
9 | class BaseBulletEnv(gym.Env):
10 | """
11 | Base class for Bullet physics simulation environments in a Scene.
12 | These environments create single-player scenes and behave like normal Gym environments, if
13 | you don't use multiplayer.
14 | """
15 |
16 | metadata = {
17 | 'render.modes': ['human', 'rgb_array'],
18 | 'video.frames_per_second': 60
19 | }
20 |
21 | def __init__(self, robot, render=False):
22 | self.scene = None
23 | self.physicsClientId = -1
24 | self.ownsPhysicsClient = 0
25 | self.camera = Camera()
26 | self.isRender = render
27 | self.robot = robot
28 | self._seed()
29 | self._cam_dist = 3
30 | self._cam_yaw = 0
31 | self._cam_pitch = -30
32 | self._render_width = 320
33 | self._render_height = 240
34 |
35 | self.action_space = robot.action_space
36 | self.observation_space = robot.observation_space
37 |
38 | def configure(self, args):
39 | self.robot.args = args
40 |
41 | def _seed(self, seed=None):
42 | self.np_random, seed = gym.utils.seeding.np_random(seed)
43 | self.robot.np_random = self.np_random # use the same np_randomizer for robot as for env
44 | return [seed]
45 |
46 | def _reset(self):
47 | if self.physicsClientId < 0:
48 | self.ownsPhysicsClient = True
49 |
50 | if self.isRender:
51 | self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
52 | else:
53 | self._p = bullet_client.BulletClient()
54 |
55 | self.physicsClientId = self._p._client
56 | self._p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI,0)
57 |
58 | if self.scene is None:
59 | self.scene = self.create_single_player_scene(self._p)
60 | if not self.scene.multiplayer and self.ownsPhysicsClient:
61 | self.scene.episode_restart(self._p)
62 |
63 | self.robot.scene = self.scene
64 |
65 | self.frame = 0
66 | self.done = 0
67 | self.reward = 0
68 | dump = 0
69 | s = self.robot.reset(self._p)
70 | self.potential = self.robot.calc_potential()
71 | return s
72 |
73 | def _render(self, mode, close=False):
74 | if mode == "human":
75 | self.isRender = True
76 | if mode != "rgb_array":
77 | return np.array([])
78 |
79 | base_pos = [0,0,0]
80 | if hasattr(self,'robot'):
81 | if hasattr(self.robot,'body_xyz'):
82 | base_pos = self.robot.body_xyz
83 |
84 | view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
85 | cameraTargetPosition=base_pos,
86 | distance=self._cam_dist,
87 | yaw=self._cam_yaw,
88 | pitch=self._cam_pitch,
89 | roll=0,
90 | upAxisIndex=2)
91 | proj_matrix = self._p.computeProjectionMatrixFOV(
92 | fov=60, aspect=float(self._render_width)/self._render_height,
93 | nearVal=0.1, farVal=100.0)
94 | (_, _, px, _, _) = self._p.getCameraImage(
95 | width=self._render_width, height=self._render_height, viewMatrix=view_matrix,
96 | projectionMatrix=proj_matrix,
97 | renderer=pybullet.ER_BULLET_HARDWARE_OPENGL
98 | )
99 | rgb_array = np.array(px)
100 | rgb_array = rgb_array[:, :, :3]
101 | return rgb_array
102 |
103 | def _close(self):
104 | if self.ownsPhysicsClient:
105 | if self.physicsClientId >= 0:
106 | self._p.disconnect()
107 | self.physicsClientId = -1
108 |
109 | def HUD(self, state, a, done):
110 | pass
111 |
112 | # backwards compatibility for gym >= v0.9.x
113 | # for extension of this class.
114 | def step(self, *args, **kwargs):
115 | return self._step(*args, **kwargs)
116 |
117 | if parse_version(gym.__version__)>=parse_version('0.9.6'):
118 | close = _close
119 | render = _render
120 | reset = _reset
121 | seed = _seed
122 |
123 |
124 | class Camera:
125 | def __init__(self):
126 | pass
127 |
128 | def move_and_look_at(self, i, j, k, x, y, z):
129 | lookat = [x, y, z]
130 | distance = 10
131 | yaw = 10
132 | self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
133 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/locomotion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/envs/locomotion/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/locomotion/ant_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.locomotors import Ant
3 |
4 |
5 | class AntBulletEnv(WalkerBaseBulletEnv):
6 | def __init__(self):
7 | self.robot = Ant()
8 | WalkerBaseBulletEnv.__init__(self, self.robot)
9 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/locomotion/atlas_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.locomotors import Atlas
3 | from pybulletgym.envs.roboschool.scenes import StadiumScene
4 |
5 |
6 | class AtlasBulletEnv(WalkerBaseBulletEnv):
7 | def __init__(self):
8 | self.robot = Atlas()
9 | WalkerBaseBulletEnv.__init__(self, self.robot)
10 |
11 | def create_single_player_scene(self, bullet_client):
12 | self.stadium_scene = StadiumScene(bullet_client, gravity=9.8, timestep=0.0165/8, frame_skip=8) # 8 instead of 4 here
13 | return self.stadium_scene
14 |
15 | def robot_specific_reset(self):
16 | self.robot.robot_specific_reset()
17 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/locomotion/half_cheetah_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.locomotors import HalfCheetah
3 |
4 |
5 | class HalfCheetahBulletEnv(WalkerBaseBulletEnv):
6 | def __init__(self):
7 | self.robot = HalfCheetah()
8 | WalkerBaseBulletEnv.__init__(self, self.robot)
9 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/locomotion/hopper_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.locomotors import Hopper
3 |
4 |
5 | class HopperBulletEnv(WalkerBaseBulletEnv):
6 | def __init__(self):
7 | self.robot = Hopper()
8 | WalkerBaseBulletEnv.__init__(self, self.robot)
9 |
10 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/locomotion/humanoid_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.locomotors import Humanoid
3 |
4 |
5 | class HumanoidBulletEnv(WalkerBaseBulletEnv):
6 | def __init__(self, robot=None):
7 | self.robot = robot if robot is not None else Humanoid()
8 | WalkerBaseBulletEnv.__init__(self, self.robot)
9 | self.electricity_cost = 4.25 * WalkerBaseBulletEnv.electricity_cost
10 | self.stall_torque_cost = 4.25 * WalkerBaseBulletEnv.stall_torque_cost
11 |
12 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/locomotion/humanoid_flagrun_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.locomotion.humanoid_env import HumanoidBulletEnv
2 | from pybulletgym.envs.roboschool.robots.locomotors import HumanoidFlagrun, HumanoidFlagrunHarder
3 |
4 |
5 | class HumanoidFlagrunBulletEnv(HumanoidBulletEnv):
6 | random_yaw = True
7 |
8 | def __init__(self):
9 | self.robot = HumanoidFlagrun()
10 | HumanoidBulletEnv.__init__(self, self.robot)
11 |
12 | def create_single_player_scene(self, bullet_client):
13 | s = HumanoidBulletEnv.create_single_player_scene(self, bullet_client)
14 | s.zero_at_running_strip_start_line = False
15 | return s
16 |
17 |
18 | class HumanoidFlagrunHarderBulletEnv(HumanoidBulletEnv):
19 | random_lean = True # can fall on start
20 |
21 | def __init__(self):
22 | self.robot = HumanoidFlagrunHarder()
23 | self.electricity_cost /= 4 # don't care that much about electricity, just stand up!
24 | HumanoidBulletEnv.__init__(self, self.robot)
25 |
26 | def create_single_player_scene(self, bullet_client):
27 | s = HumanoidBulletEnv.create_single_player_scene(self, bullet_client)
28 | s.zero_at_running_strip_start_line = False
29 | return s
30 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/locomotion/walker2d_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.locomotion.walker_base_env import WalkerBaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.locomotors import Walker2D
3 |
4 |
5 | class Walker2DBulletEnv(WalkerBaseBulletEnv):
6 | def __init__(self):
7 | self.robot = Walker2D()
8 | WalkerBaseBulletEnv.__init__(self, self.robot)
9 |
10 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/manipulation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/envs/manipulation/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/manipulation/pusher_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.manipulators.pusher import Pusher
3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
4 | import numpy as np
5 |
6 |
7 | class PusherBulletEnv(BaseBulletEnv):
8 | def __init__(self):
9 | self.robot = Pusher()
10 | BaseBulletEnv.__init__(self, self.robot)
11 |
12 | def create_single_player_scene(self, bullet_client):
13 | return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5)
14 |
15 | def step(self, a):
16 | self.robot.apply_action(a)
17 | self.scene.global_step()
18 |
19 | state = self.robot.calc_state() # sets self.to_target_vec
20 |
21 | potential_old = self.potential
22 | self.potential = self.robot.calc_potential()
23 |
24 | joint_vel = np.array([
25 | self.robot.shoulder_pan_joint.get_velocity(),
26 | self.robot.shoulder_lift_joint.get_velocity(),
27 | self.robot.upper_arm_roll_joint.get_velocity(),
28 | self.robot.elbow_flex_joint.get_velocity(),
29 | self.robot.forearm_roll_joint.get_velocity(),
30 | self.robot.wrist_flex_joint.get_velocity(),
31 | self.robot.wrist_roll_joint.get_velocity()
32 | ])
33 |
34 | action_product = np.matmul(np.abs(a), np.abs(joint_vel))
35 | action_sum = np.sum(a)
36 |
37 | electricity_cost = (
38 | -0.10 * action_product # work torque*angular_velocity
39 | - 0.01 * action_sum # stall torque require some energy
40 | )
41 |
42 | stuck_joint_cost = 0
43 | for j in self.robot.ordered_joints:
44 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
45 | stuck_joint_cost += -0.1
46 |
47 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)]
48 | self.HUD(state, a, False)
49 | return state, sum(self.rewards), False, {}
50 |
51 | def calc_potential(self):
52 | return -100 * np.linalg.norm(self.to_target_vec)
53 |
54 | def camera_adjust(self):
55 | x, y, z = self.robot.fingertip.pose().xyz()
56 | x *= 0.5
57 | y *= 0.5
58 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
59 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/manipulation/reacher_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
3 | import numpy as np
4 | from pybulletgym.envs.roboschool.robots.manipulators.reacher import Reacher
5 |
6 |
7 | class ReacherBulletEnv(BaseBulletEnv):
8 | def __init__(self):
9 | self.robot = Reacher()
10 | BaseBulletEnv.__init__(self, self.robot)
11 |
12 | def create_single_player_scene(self, bullet_client):
13 | return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0165, frame_skip=1)
14 |
15 | def step(self, a):
16 | assert (not self.scene.multiplayer)
17 | self.robot.apply_action(a)
18 | self.scene.global_step()
19 |
20 | state = self.robot.calc_state() # sets self.to_target_vec
21 |
22 | potential_old = self.potential
23 | self.potential = self.robot.calc_potential()
24 |
25 | electricity_cost = (
26 | -0.10 * (np.abs(a[0] * self.robot.theta_dot) + np.abs(a[1] * self.robot.gamma_dot)) # work torque*angular_velocity
27 | - 0.01 * (np.abs(a[0]) + np.abs(a[1])) # stall torque require some energy
28 | )
29 | stuck_joint_cost = -0.1 if np.abs(np.abs(self.robot.gamma) - 1) < 0.01 else 0.0
30 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost)]
31 | self.HUD(state, a, False)
32 | return state, sum(self.rewards), False, {}
33 |
34 | def camera_adjust(self):
35 | x, y, z = self.robot.fingertip.pose().xyz()
36 | x *= 0.5
37 | y *= 0.5
38 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
39 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/manipulation/striker_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.manipulators.striker import Striker
3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
4 | import numpy as np
5 |
6 |
7 | class StrikerBulletEnv(BaseBulletEnv):
8 | def __init__(self):
9 | self.robot = Striker()
10 | BaseBulletEnv.__init__(self, self.robot)
11 | self._striked = False
12 | self._min_strike_dist = np.inf
13 | self.strike_threshold = 0.1
14 |
15 | def create_single_player_scene(self, bullet_client):
16 | return SingleRobotEmptyScene(bullet_client, gravity=9.81, timestep=0.0020, frame_skip=5)
17 |
18 | def step(self, a):
19 | self.robot.apply_action(a)
20 | self.scene.global_step()
21 |
22 | state = self.robot.calc_state() # sets self.to_target_vec
23 |
24 | potential_old = self.potential
25 | self.potential = self.robot.calc_potential()
26 |
27 | joint_vel = np.array([
28 | self.robot.shoulder_pan_joint.get_velocity(),
29 | self.robot.shoulder_lift_joint.get_velocity(),
30 | self.robot.upper_arm_roll_joint.get_velocity(),
31 | self.robot.elbow_flex_joint.get_velocity(),
32 | self.robot.forearm_roll_joint.get_velocity(),
33 | self.robot.wrist_flex_joint.get_velocity(),
34 | self.robot.wrist_roll_joint.get_velocity()
35 | ])
36 |
37 | action_product = np.matmul(np.abs(a), np.abs(joint_vel))
38 | action_sum = np.sum(a)
39 |
40 | electricity_cost = (
41 | -0.10 * action_product # work torque*angular_velocity
42 | - 0.01 * action_sum # stall torque require some energy
43 | )
44 |
45 | stuck_joint_cost = 0
46 | for j in self.robot.ordered_joints:
47 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
48 | stuck_joint_cost += -0.1
49 |
50 | dist_object_finger = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
51 | reward_dist_vec = self.robot.object.pose().xyz() - self.robot.target.pose().xyz() # TODO: Should the object and target really belong to the robot? Maybe split this off
52 |
53 | self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(reward_dist_vec))
54 |
55 | if np.linalg.norm(dist_object_finger) < self.strike_threshold:
56 | self._striked = True
57 | self._strike_pos = self.robot.fingertip.pose().xyz()
58 |
59 | if self._striked:
60 | reward_near_vec = self.robot.object.pose().xyz() - self._strike_pos
61 | else:
62 | reward_near_vec = self.robot.object.pose().xyz() - self.robot.fingertip.pose().xyz()
63 |
64 | reward_near = - np.linalg.norm(reward_near_vec)
65 |
66 | reward_dist = - np.linalg.norm(self._min_strike_dist)
67 | reward_ctrl = - np.square(a).sum()
68 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost),
69 | 3 * reward_dist, 0.1 * reward_ctrl, 0.5 * reward_near]
70 | self.HUD(state, a, False)
71 | return state, sum(self.rewards), False, {}
72 |
73 | def calc_potential(self):
74 | return -100 * np.linalg.norm(self.to_target_vec)
75 |
76 | def camera_adjust(self):
77 | x, y, z = self.robot.fingertip.pose().xyz()
78 | x *= 0.5
79 | y *= 0.5
80 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
81 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/manipulation/thrower_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.manipulators.thrower import Thrower
3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
4 | import numpy as np
5 |
6 |
7 | class ThrowerBulletEnv(BaseBulletEnv):
8 | def __init__(self):
9 | self.robot = Thrower()
10 | BaseBulletEnv.__init__(self, self.robot)
11 |
12 | def create_single_player_scene(self, bullet_client):
13 | return SingleRobotEmptyScene(bullet_client, gravity=0.0, timestep=0.0020, frame_skip=5)
14 |
15 | def step(self, a):
16 | self.robot.apply_action(a)
17 | self.scene.global_step()
18 | state = self.robot.calc_state() # sets self.to_target_vec
19 |
20 | potential_old = self.potential
21 | self.potential = self.robot.calc_potential()
22 |
23 | joint_vel = np.array([
24 | self.robot.shoulder_pan_joint.get_velocity(),
25 | self.robot.shoulder_lift_joint.get_velocity(),
26 | self.robot.upper_arm_roll_joint.get_velocity(),
27 | self.robot.elbow_flex_joint.get_velocity(),
28 | self.robot.forearm_roll_joint.get_velocity(),
29 | self.robot.wrist_flex_joint.get_velocity(),
30 | self.robot.wrist_roll_joint.get_velocity()
31 | ])
32 |
33 | action_product = np.matmul(np.abs(a), np.abs(joint_vel))
34 | action_sum = np.sum(a)
35 |
36 | electricity_cost = (
37 | -0.10 * action_product # work torque*angular_velocity
38 | - 0.01 * action_sum # stall torque require some energy
39 | )
40 |
41 | stuck_joint_cost = 0
42 | for j in self.robot.ordered_joints:
43 | if np.abs(j.current_relative_position()[0]) - 1 < 0.01:
44 | stuck_joint_cost += -0.1
45 |
46 | object_xy = self.robot.object.pose().xyz()[:2]
47 | target_xy = self.robot.target.pose().xyz()[:2]
48 |
49 | if not self.robot._object_hit_ground and self.robot.object.pose().xyz()[2] < -0.25: # TODO: Should the object and target really belong to the robot? Maybe split this off
50 | self.robot._object_hit_ground = True
51 | self.robot._object_hit_location = self.robot.object.pose().xyz()
52 |
53 | if self.robot._object_hit_ground:
54 | object_hit_xy = self.robot._object_hit_location[:2]
55 | reward_dist = -np.linalg.norm(object_hit_xy - target_xy)
56 | else:
57 | reward_dist = -np.linalg.norm(object_xy - target_xy)
58 | reward_ctrl = - np.square(a).sum()
59 |
60 | self.rewards = [float(self.potential - potential_old), float(electricity_cost), float(stuck_joint_cost),
61 | reward_dist, 0.002 * reward_ctrl]
62 | self.HUD(state, a, False)
63 | return state, sum(self.rewards), False, {}
64 |
65 | def camera_adjust(self):
66 | x, y, z = self.robot.fingertip.pose().xyz()
67 | x *= 0.5
68 | y *= 0.5
69 | self.camera.move_and_look_at(0.3, 0.3, 0.3, x, y, z)
70 |
71 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/pendulum/__init__.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.pendulum.inverted_pendulum_env import InvertedPendulumBulletEnv, InvertedPendulumSwingupBulletEnv
2 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/pendulum/inverted_double_pendulum_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.pendula.inverted_double_pendulum import InvertedDoublePendulum
3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
4 |
5 |
6 | class InvertedDoublePendulumBulletEnv(BaseBulletEnv):
7 | def __init__(self):
8 | self.robot = InvertedDoublePendulum()
9 | BaseBulletEnv.__init__(self, self.robot)
10 | self.stateId = -1
11 |
12 | def create_single_player_scene(self, bullet_client):
13 | return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)
14 |
15 | def reset(self):
16 | if self.stateId >= 0:
17 | self._p.restoreState(self.stateId)
18 | r = BaseBulletEnv._reset(self)
19 | if self.stateId < 0:
20 | self.stateId = self._p.saveState()
21 | return r
22 |
23 | def step(self, a):
24 | self.robot.apply_action(a)
25 | self.scene.global_step()
26 | state = self.robot.calc_state() # sets self.pos_x self.pos_y
27 | # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
28 | # using tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
29 | dist_penalty = 0.01 * self.robot.pos_x ** 2 + (self.robot.pos_y + 0.3 - 2) ** 2
30 | # v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040
31 | # vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
32 | vel_penalty = 0
33 | alive_bonus = 10
34 | done = self.robot.pos_y + 0.3 <= 1
35 | self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)]
36 | self.HUD(state, a, done)
37 | return state, sum(self.rewards), done, {}
38 |
39 | def camera_adjust(self):
40 | self.camera.move_and_look_at(0,1.2,1.2, 0,0,0.5)
41 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/envs/pendulum/inverted_pendulum_env.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.envs.env_bases import BaseBulletEnv
2 | from pybulletgym.envs.roboschool.robots.pendula.interted_pendulum import InvertedPendulum, InvertedPendulumSwingup
3 | from pybulletgym.envs.roboschool.scenes.scene_bases import SingleRobotEmptyScene
4 | import numpy as np
5 |
6 |
7 | class InvertedPendulumBulletEnv(BaseBulletEnv):
8 | def __init__(self):
9 | self.robot = InvertedPendulum()
10 | BaseBulletEnv.__init__(self, self.robot)
11 | self.stateId = -1
12 |
13 | def create_single_player_scene(self, bullet_client):
14 | return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)
15 |
16 | def reset(self):
17 | if self.stateId >= 0:
18 | # print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")")
19 | self._p.restoreState(self.stateId)
20 | r = BaseBulletEnv._reset(self)
21 | if self.stateId < 0:
22 | self.stateId = self._p.saveState()
23 | # print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId)
24 | return r
25 |
26 | def step(self, a):
27 | self.robot.apply_action(a)
28 | self.scene.global_step()
29 | state = self.robot.calc_state() # sets self.pos_x self.pos_y
30 | vel_penalty = 0
31 | if self.robot.swingup:
32 | reward = np.cos(self.robot.theta)
33 | done = False
34 | else:
35 | reward = 1.0
36 | done = np.abs(self.robot.theta) > .2
37 | self.rewards = [float(reward)]
38 | self.HUD(state, a, done)
39 | return state, sum(self.rewards), done, {}
40 |
41 | def camera_adjust(self):
42 | self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
43 |
44 |
45 | class InvertedPendulumSwingupBulletEnv(InvertedPendulumBulletEnv):
46 | def __init__(self):
47 | self.robot = InvertedPendulumSwingup()
48 | BaseBulletEnv.__init__(self, self.robot)
49 | self.stateId = -1
50 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/robots/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/locomotors/__init__.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.locomotors.ant import Ant
2 | from pybulletgym.envs.roboschool.robots.locomotors.atlas import Atlas
3 | from pybulletgym.envs.roboschool.robots.locomotors.half_cheetah import HalfCheetah
4 | from pybulletgym.envs.roboschool.robots.locomotors.hopper import Hopper
5 | from pybulletgym.envs.roboschool.robots.locomotors.humanoid import Humanoid
6 | from pybulletgym.envs.roboschool.robots.locomotors.humanoid_flagrun import HumanoidFlagrun, HumanoidFlagrunHarder
7 | from pybulletgym.envs.roboschool.robots.locomotors.walker2d import Walker2D
8 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/locomotors/ant.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
3 |
4 |
5 | class Ant(WalkerBase, MJCFBasedRobot):
6 | foot_list = ['front_left_foot', 'front_right_foot', 'left_back_foot', 'right_back_foot']
7 |
8 | def __init__(self):
9 | WalkerBase.__init__(self, power=2.5)
10 | MJCFBasedRobot.__init__(self, "ant.xml", "torso", action_dim=8, obs_dim=28)
11 |
12 | def alive_bonus(self, z, pitch):
13 | return +1 if z > 0.26 else -1 # 0.25 is central sphere rad, die if it scrapes the ground
14 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/locomotors/atlas.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.roboschool.robots.robot_bases import URDFBasedRobot
3 | import numpy as np
4 | import pybullet as p
5 |
6 |
7 | class Atlas(WalkerBase, URDFBasedRobot):
8 | random_yaw = False
9 | foot_list = ["r_foot", "l_foot"]
10 |
11 | def __init__(self):
12 | WalkerBase.__init__(self, power=2.9)
13 | URDFBasedRobot.__init__(self, "atlas/atlas_description/atlas_v4_with_multisense.urdf", "pelvis", action_dim=30, obs_dim=70)
14 |
15 | def alive_bonus(self, z, pitch):
16 | # This is debug code to fix unwanted self-collisions:
17 | #for part in self.parts.values():
18 | # contact_names = set(x.name for x in part.contact_list())
19 | # if contact_names:
20 | # print("CONTACT OF '%s' WITH '%s'" % (part.name, ",".join(contact_names)) )
21 |
22 | x, y, z = self.head.pose().xyz()
23 | # Failure mode: robot doesn't bend knees, tries to walk using hips.
24 | # We fix that by a bit of reward engineering.
25 | knees = np.array([j.current_relative_position() for j in [self.jdict["l_leg_kny"], self.jdict["r_leg_kny"]]], dtype=np.float32).flatten()
26 | knees_at_limit = np.count_nonzero(np.abs(knees[0::2]) > 0.99)
27 | return +4-knees_at_limit if z > 1.3 else -1
28 |
29 | def robot_specific_reset(self, bullet_client):
30 | WalkerBase.robot_specific_reset(self, bullet_client)
31 | self.set_initial_orientation(yaw_center=0, yaw_random_spread=np.pi)
32 | self.head = self.parts["head"]
33 |
34 | def set_initial_orientation(self, yaw_center, yaw_random_spread):
35 | if not self.random_yaw:
36 | yaw = yaw_center
37 | else:
38 | yaw = yaw_center + self.np_random.uniform(low=-yaw_random_spread, high=yaw_random_spread)
39 |
40 | position = [self.start_pos_x, self.start_pos_y, self.start_pos_z + 1.0]
41 | orientation = [0, 0, yaw] # just face random direction, but stay straight otherwise
42 | self.robot_body.reset_pose(position, p.getQuaternionFromEuler(orientation))
43 | self.initial_z = 1.5
44 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/locomotors/half_cheetah.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
3 | import numpy as np
4 |
5 |
6 | class HalfCheetah(WalkerBase, MJCFBasedRobot):
7 | foot_list = ["ffoot", "fshin", "fthigh", "bfoot", "bshin", "bthigh"] # track these contacts with ground
8 |
9 | def __init__(self):
10 | WalkerBase.__init__(self, power=0.90)
11 | MJCFBasedRobot.__init__(self, "half_cheetah.xml", "torso", action_dim=6, obs_dim=26)
12 |
13 | def alive_bonus(self, z, pitch):
14 | # Use contact other than feet to terminate episode: due to a lot of strange walks using knees
15 | return +1 if np.abs(pitch) < 1.0 and not self.feet_contact[1] and not self.feet_contact[2] and not self.feet_contact[4] and not self.feet_contact[5] else -1
16 |
17 | def robot_specific_reset(self, bullet_client):
18 | WalkerBase.robot_specific_reset(self, bullet_client)
19 | self.jdict["bthigh"].power_coef = 120.0
20 | self.jdict["bshin"].power_coef = 90.0
21 | self.jdict["bfoot"].power_coef = 60.0
22 | self.jdict["fthigh"].power_coef = 140.0
23 | self.jdict["fshin"].power_coef = 60.0
24 | self.jdict["ffoot"].power_coef = 30.0
25 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/locomotors/hopper.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
3 |
4 |
5 | class Hopper(WalkerBase, MJCFBasedRobot):
6 | foot_list = ["foot"]
7 |
8 | def __init__(self):
9 | WalkerBase.__init__(self, power=0.75)
10 | MJCFBasedRobot.__init__(self, "hopper.xml", "torso", action_dim=3, obs_dim=15)
11 |
12 | def alive_bonus(self, z, pitch):
13 | return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
14 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/locomotors/humanoid.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
3 | import numpy as np
4 |
5 |
6 | class Humanoid(WalkerBase, MJCFBasedRobot):
7 | self_collision = True
8 | foot_list = ["right_foot", "left_foot"] # "left_hand", "right_hand"
9 |
10 | def __init__(self, random_yaw=False, random_lean=False):
11 | WalkerBase.__init__(self, power=0.41)
12 | MJCFBasedRobot.__init__(self, 'humanoid_symmetric.xml', 'torso', action_dim=17, obs_dim=44)
13 | # 17 joints, 4 of them important for walking (hip, knee), others may as well be turned off, 17/4 = 4.25
14 | self.random_yaw = random_yaw
15 | self.random_lean = random_lean
16 |
17 | def robot_specific_reset(self, bullet_client):
18 | WalkerBase.robot_specific_reset(self, bullet_client)
19 | self.motor_names = ["abdomen_z", "abdomen_y", "abdomen_x"]
20 | self.motor_power = [100, 100, 100]
21 | self.motor_names += ["right_hip_x", "right_hip_z", "right_hip_y", "right_knee"]
22 | self.motor_power += [100, 100, 300, 200]
23 | self.motor_names += ["left_hip_x", "left_hip_z", "left_hip_y", "left_knee"]
24 | self.motor_power += [100, 100, 300, 200]
25 | self.motor_names += ["right_shoulder1", "right_shoulder2", "right_elbow"]
26 | self.motor_power += [75, 75, 75]
27 | self.motor_names += ["left_shoulder1", "left_shoulder2", "left_elbow"]
28 | self.motor_power += [75, 75, 75]
29 | self.motors = [self.jdict[n] for n in self.motor_names]
30 | if self.random_yaw:
31 | position = [0,0,0]
32 | orientation = [0,0,0]
33 | yaw = self.np_random.uniform(low=-3.14, high=3.14)
34 | if self.random_lean and self.np_random.randint(2) == 0:
35 | if self.np_random.randint(2) == 0:
36 | pitch = np.pi/2
37 | position = [0, 0, 0.45]
38 | else:
39 | pitch = np.pi*3/2
40 | position = [0, 0, 0.25]
41 | roll = 0
42 | orientation = [roll, pitch, yaw]
43 | else:
44 | position = [0, 0, 1.4]
45 | orientation = [0, 0, yaw] # just face random direction, but stay straight otherwise
46 | self.robot_body.reset_position(position)
47 | self.robot_body.reset_orientation(p.getQuaternionFromEuler(orientation))
48 | self.initial_z = 0.8
49 |
50 | def apply_action(self, a):
51 | assert(np.isfinite(a).all())
52 | force_gain = 1
53 | for i, m, power in zip(range(17), self.motors, self.motor_power):
54 | m.set_motor_torque(float(force_gain * power * self.power * np.clip(a[i], -1, +1)))
55 |
56 | def alive_bonus(self, z, pitch):
57 | return +2 if z > 0.78 else -1 # 2 here because 17 joints produce a lot of electricity cost just from policy noise, living must be better than dying
58 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/locomotors/walker2d.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.locomotors.walker_base import WalkerBase
2 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
3 |
4 |
5 | class Walker2D(WalkerBase, MJCFBasedRobot):
6 | foot_list = ["foot", "foot_left"]
7 |
8 | def __init__(self):
9 | WalkerBase.__init__(self, power=0.40)
10 | MJCFBasedRobot.__init__(self, "walker2d.xml", "torso", action_dim=6, obs_dim=22)
11 |
12 | def alive_bonus(self, z, pitch):
13 | return +1 if z > 0.8 and abs(pitch) < 1.0 else -1
14 |
15 | def robot_specific_reset(self, bullet_client):
16 | WalkerBase.robot_specific_reset(self, bullet_client)
17 | for n in ["foot_joint", "foot_left_joint"]:
18 | self.jdict[n].power_coef = 30.0
19 |
20 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/locomotors/walker_base.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.robot_bases import XmlBasedRobot
2 | import numpy as np
3 |
4 |
5 | class WalkerBase(XmlBasedRobot):
6 | def __init__(self, power):
7 | self.power = power
8 | self.camera_x = 0
9 | self.start_pos_x, self.start_pos_y, self.start_pos_z = 0, 0, 0
10 | self.walk_target_x = 1e3 # kilometer away
11 | self.walk_target_y = 0
12 | self.body_xyz = [0, 0, 0]
13 |
14 | def robot_specific_reset(self, bullet_client):
15 | self._p = bullet_client
16 | for j in self.ordered_joints:
17 | j.reset_current_position(self.np_random.uniform(low=-0.1, high=0.1), 0)
18 |
19 | self.feet = [self.parts[f] for f in self.foot_list]
20 | self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32)
21 | try:
22 | self.scene.actor_introduce(self)
23 | except AttributeError:
24 | pass
25 | self.initial_z = None
26 |
27 | def apply_action(self, a):
28 | assert (np.isfinite(a).all())
29 | for n, j in enumerate(self.ordered_joints):
30 | j.set_motor_torque(self.power * j.power_coef * float(np.clip(a[n], -1, +1)))
31 |
32 | def calc_state(self):
33 | j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten()
34 | # even elements [0::2] position, scaled to -1..+1 between limits
35 | # odd elements [1::2] angular speed, scaled to show -1..+1
36 | self.joint_speeds = j[1::2]
37 | self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99)
38 |
39 | body_pose = self.robot_body.pose()
40 | parts_xyz = np.array([p.pose().xyz() for p in self.parts.values()]).flatten()
41 | self.body_xyz = (
42 | parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2]) # torso z is more informative than mean z
43 | self.body_rpy = body_pose.rpy()
44 | z = self.body_xyz[2]
45 | if self.initial_z is None:
46 | self.initial_z = z
47 | r, p, yaw = self.body_rpy
48 | self.walk_target_theta = np.arctan2(self.walk_target_y - self.body_xyz[1],
49 | self.walk_target_x - self.body_xyz[0])
50 | self.walk_target_dist = np.linalg.norm(
51 | [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]])
52 | angle_to_target = self.walk_target_theta - yaw
53 |
54 | rot_speed = np.array(
55 | [[np.cos(-yaw), -np.sin(-yaw), 0],
56 | [np.sin(-yaw), np.cos(-yaw), 0],
57 | [ 0, 0, 1]]
58 | )
59 | vx, vy, vz = np.dot(rot_speed, self.robot_body.speed()) # rotate speed back to body point of view
60 |
61 | more = np.array([z-self.initial_z,
62 | np.sin(angle_to_target), np.cos(angle_to_target),
63 | 0.3 * vx, 0.3 * vy, 0.3 * vz, # 0.3 is just scaling typical speed into -1..+1, no physical sense here
64 | r, p], dtype=np.float32)
65 | return np.clip(np.concatenate([more] + [j] + [self.feet_contact]), -5, +5)
66 |
67 | def calc_potential(self):
68 | # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
69 | # all rewards have rew/frame units and close to 1.0
70 | try:
71 | debugmode = 0
72 | if debugmode:
73 | print("calc_potential: self.walk_target_dist")
74 | print(self.walk_target_dist)
75 | print("self.scene.dt")
76 | print(self.scene.dt)
77 | print("self.scene.frame_skip")
78 | print(self.scene.frame_skip)
79 | print("self.scene.timestep")
80 | print(self.scene.timestep)
81 | return - self.walk_target_dist / self.scene.dt
82 | except AttributeError:
83 | return - self.walk_target_dist
84 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/manipulators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/robots/manipulators/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/manipulators/pusher.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
2 | import numpy as np
3 |
4 |
5 | class Pusher(MJCFBasedRobot):
6 | min_target_placement_radius = 0.5
7 | max_target_placement_radius = 0.8
8 | min_object_to_target_distance = 0.1
9 | max_object_to_target_distance = 0.4
10 |
11 | def __init__(self):
12 | MJCFBasedRobot.__init__(self, 'pusher.xml', 'body0', action_dim=7, obs_dim=55)
13 |
14 | def robot_specific_reset(self, bullet_client):
15 | # parts
16 | self.fingertip = self.parts["tips_arm"]
17 | self.target = self.parts["goal"]
18 | self.object = self.parts["object"]
19 |
20 | # joints
21 | self.shoulder_pan_joint = self.jdict["r_shoulder_pan_joint"]
22 | self.shoulder_lift_joint = self.jdict["r_shoulder_lift_joint"]
23 | self.upper_arm_roll_joint = self.jdict["r_upper_arm_roll_joint"]
24 | self.elbow_flex_joint = self.jdict["r_elbow_flex_joint"]
25 | self.forearm_roll_joint = self.jdict["r_forearm_roll_joint"]
26 | self.wrist_flex_joint = self.jdict["r_wrist_flex_joint"]
27 | self.wrist_roll_joint = self.jdict["r_wrist_roll_joint"]
28 |
29 | self.target_pos = np.concatenate([
30 | self.np_random.uniform(low=-1, high=1, size=1),
31 | self.np_random.uniform(low=-1, high=1, size=1)
32 | ])
33 |
34 | # make length of vector between min and max_target_placement_radius
35 | self.target_pos = self.target_pos \
36 | / np.linalg.norm(self.target_pos) \
37 | * self.np_random.uniform(low=self.min_target_placement_radius,
38 | high=self.max_target_placement_radius, size=1)
39 |
40 | self.object_pos = np.concatenate([
41 | self.np_random.uniform(low=-1, high=1, size=1),
42 | self.np_random.uniform(low=-1, high=1, size=1)
43 | ])
44 |
45 | # make length of vector between min and max_object_to_target_distance
46 | self.object_pos = self.object_pos \
47 | / np.linalg.norm(self.object_pos - self.target_pos) \
48 | * self.np_random.uniform(low=self.min_object_to_target_distance,
49 | high=self.max_object_to_target_distance, size=1)
50 |
51 | # set position of objects
52 | self.zero_offset = np.array([0.45, 0.55])
53 | self.jdict["goal_slidex"].reset_current_position(self.target_pos[0] - self.zero_offset[0], 0)
54 | self.jdict["goal_slidey"].reset_current_position(self.target_pos[1] - self.zero_offset[1], 0)
55 | self.jdict["obj_slidex"].reset_current_position(self.object_pos[0] - self.zero_offset[0], 0)
56 | self.jdict["obj_slidey"].reset_current_position(self.object_pos[1] - self.zero_offset[1], 0)
57 |
58 | # randomize all joints TODO: Will this work or do we have to constrain this resetting in some way?
59 | self.shoulder_pan_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
60 | self.shoulder_lift_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
61 | self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
62 | self.elbow_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
63 | self.forearm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
64 | self.wrist_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
65 | self.wrist_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
66 |
67 | def apply_action(self, a):
68 | assert (np.isfinite(a).all())
69 | self.shoulder_pan_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
70 | self.shoulder_lift_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
71 | self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[2], -1, +1)))
72 | self.elbow_flex_joint.set_motor_torque(0.05 * float(np.clip(a[3], -1, +1)))
73 | self.forearm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[4], -1, +1)))
74 | self.wrist_flex_joint.set_motor_torque(0.05 * float(np.clip(a[5], -1, +1)))
75 | self.wrist_roll_joint.set_motor_torque(0.05 * float(np.clip(a[6], -1, +1)))
76 |
77 | def calc_state(self):
78 | self.to_target_vec = self.target_pos - self.object_pos
79 | return np.concatenate([
80 | np.array([j.current_position() for j in self.ordered_joints]).flatten(), # all positions
81 | np.array([j.current_relative_position() for j in self.ordered_joints]).flatten(), # all speeds
82 | self.to_target_vec,
83 | self.fingertip.pose().xyz(),
84 | self.object.pose().xyz(),
85 | self.target.pose().xyz(),
86 | ])
87 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/manipulators/reacher.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
2 | import numpy as np
3 |
4 |
5 | class Reacher(MJCFBasedRobot):
6 | TARG_LIMIT = 0.27
7 |
8 | def __init__(self):
9 | MJCFBasedRobot.__init__(self, 'reacher.xml', 'body0', action_dim=2, obs_dim=9)
10 |
11 | def robot_specific_reset(self, bullet_client):
12 | self.jdict["target_x"].reset_current_position(
13 | self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0)
14 | self.jdict["target_y"].reset_current_position(
15 | self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0)
16 | self.fingertip = self.parts["fingertip"]
17 | self.target = self.parts["target"]
18 | self.central_joint = self.jdict["joint0"]
19 | self.elbow_joint = self.jdict["joint1"]
20 | self.central_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
21 | self.elbow_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
22 |
23 | def apply_action(self, a):
24 | assert (np.isfinite(a).all())
25 | self.central_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
26 | self.elbow_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
27 |
28 | def calc_state(self):
29 | theta, self.theta_dot = self.central_joint.current_relative_position()
30 | self.gamma, self.gamma_dot = self.elbow_joint.current_relative_position()
31 | target_x, _ = self.jdict["target_x"].current_position()
32 | target_y, _ = self.jdict["target_y"].current_position()
33 | self.to_target_vec = np.array(self.fingertip.pose().xyz()) - np.array(self.target.pose().xyz())
34 | return np.array([
35 | target_x,
36 | target_y,
37 | self.to_target_vec[0],
38 | self.to_target_vec[1],
39 | np.cos(theta),
40 | np.sin(theta),
41 | self.theta_dot,
42 | self.gamma,
43 | self.gamma_dot,
44 | ])
45 |
46 | def calc_potential(self):
47 | return -100 * np.linalg.norm(self.to_target_vec)
48 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/manipulators/thrower.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
2 | import numpy as np
3 |
4 |
5 | class Thrower(MJCFBasedRobot):
6 | min_target_placement_radius = 0.1
7 | max_target_placement_radius = 0.8
8 | min_object_placement_radius = 0.1
9 | max_object_placement_radius = 0.8
10 |
11 | def __init__(self):
12 | MJCFBasedRobot.__init__(self, 'thrower.xml', 'body0', action_dim=7, obs_dim=48)
13 |
14 | def robot_specific_reset(self, bullet_client):
15 | # parts
16 | self.fingertip = self.parts["r_wrist_roll_link"]
17 | self.target = self.parts["goal"]
18 | self.object = self.parts["ball"]
19 |
20 | # joints
21 | self.shoulder_pan_joint = self.jdict["r_shoulder_pan_joint"]
22 | self.shoulder_lift_joint = self.jdict["r_shoulder_lift_joint"]
23 | self.upper_arm_roll_joint = self.jdict["r_upper_arm_roll_joint"]
24 | self.elbow_flex_joint = self.jdict["r_elbow_flex_joint"]
25 | self.forearm_roll_joint = self.jdict["r_forearm_roll_joint"]
26 | self.wrist_flex_joint = self.jdict["r_wrist_flex_joint"]
27 | self.wrist_roll_joint = self.jdict["r_wrist_roll_joint"]
28 |
29 | self._object_hit_ground = False
30 | self._object_hit_location = None
31 |
32 | # reset position and speed of manipulator
33 | # TODO: Will this work or do we have to constrain this resetting in some way?
34 | self.shoulder_pan_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
35 | self.shoulder_lift_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
36 | self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
37 | self.elbow_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
38 | self.forearm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
39 | self.wrist_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
40 | self.wrist_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
41 |
42 | self.zero_offset = np.array([0.45, 0.55, 0])
43 | self.object_pos = np.concatenate([
44 | self.np_random.uniform(low=-1, high=1, size=1),
45 | self.np_random.uniform(low=-1, high=1, size=1),
46 | self.np_random.uniform(low=-1, high=1, size=1)
47 | ])
48 |
49 | # make length of vector between min and max_object_placement_radius
50 | self.object_pos = self.object_pos \
51 | / np.linalg.norm(self.object_pos) \
52 | * self.np_random.uniform(low=self.min_object_placement_radius,
53 | high=self.max_object_placement_radius, size=1)
54 |
55 | # reset object position
56 | self.parts["ball"].reset_pose(self.object_pos - self.zero_offset, np.array([0, 0, 0, 1]))
57 |
58 | self.target_pos = np.concatenate([
59 | self.np_random.uniform(low=-1, high=1, size=1),
60 | self.np_random.uniform(low=-1, high=1, size=1),
61 | self.np_random.uniform(low=-1, high=1, size=1)
62 | ])
63 |
64 | # make length of vector between min and max_target_placement_radius
65 | self.target_pos = self.target_pos \
66 | / np.linalg.norm(self.target_pos) \
67 | * self.np_random.uniform(low=self.min_target_placement_radius,
68 | high=self.max_target_placement_radius, size=1)
69 |
70 | self.parts["goal"].reset_pose(self.target_pos - self.zero_offset, np.array([0, 0, 0, 1]))
71 |
72 | def apply_action(self, a):
73 | assert (np.isfinite(a).all())
74 | self.shoulder_pan_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
75 | self.shoulder_lift_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
76 | self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[2], -1, +1)))
77 | self.elbow_flex_joint.set_motor_torque(0.05 * float(np.clip(a[3], -1, +1)))
78 | self.forearm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[4], -1, +1)))
79 | self.wrist_flex_joint.set_motor_torque(0.05 * float(np.clip(a[5], -1, +1)))
80 | self.wrist_roll_joint.set_motor_torque(0.05 * float(np.clip(a[6], -1, +1)))
81 |
82 | def calc_state(self):
83 | self.to_target_vec = self.target_pos - self.object_pos
84 | return np.concatenate([
85 | np.array([j.current_position() for j in self.ordered_joints]).flatten(), # all positions
86 | np.array([j.current_relative_position() for j in self.ordered_joints]).flatten(), # all speeds
87 | self.to_target_vec,
88 | self.fingertip.pose().xyz(),
89 | self.object.pose().xyz(),
90 | self.target.pose().xyz(),
91 | ])
92 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/pendula/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/envs/roboschool/robots/pendula/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/pendula/interted_pendulum.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
2 | import numpy as np
3 |
4 |
5 | class InvertedPendulum(MJCFBasedRobot):
6 | swingup = False
7 |
8 | def __init__(self):
9 | MJCFBasedRobot.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=5)
10 |
11 | def robot_specific_reset(self, bullet_client):
12 | self._p = bullet_client
13 | self.pole = self.parts["pole"]
14 | self.slider = self.jdict["slider"]
15 | self.j1 = self.jdict["hinge"]
16 | u = self.np_random.uniform(low=-.1, high=.1)
17 | self.j1.reset_current_position( u if not self.swingup else 3.1415+u , 0)
18 | self.j1.set_motor_torque(0)
19 |
20 | def apply_action(self, a):
21 | assert( np.isfinite(a).all() )
22 | if not np.isfinite(a).all():
23 | print("a is inf")
24 | a[0] = 0
25 | self.slider.set_motor_torque( 100*float(np.clip(a[0], -1, +1)) )
26 |
27 | def calc_state(self):
28 | self.theta, theta_dot = self.j1.current_position()
29 | x, vx = self.slider.current_position()
30 | assert( np.isfinite(x) )
31 |
32 | if not np.isfinite(x):
33 | print("x is inf")
34 | x = 0
35 |
36 | if not np.isfinite(vx):
37 | print("vx is inf")
38 | vx = 0
39 |
40 | if not np.isfinite(self.theta):
41 | print("theta is inf")
42 | self.theta = 0
43 |
44 | if not np.isfinite(theta_dot):
45 | print("theta_dot is inf")
46 | theta_dot = 0
47 |
48 | return np.array([
49 | x, vx,
50 | np.cos(self.theta), np.sin(self.theta), theta_dot
51 | ])
52 |
53 |
54 | class InvertedPendulumSwingup(InvertedPendulum):
55 | swingup = True
56 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/robots/pendula/inverted_double_pendulum.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.robot_bases import MJCFBasedRobot
2 | import numpy as np
3 |
4 |
5 | class InvertedDoublePendulum(MJCFBasedRobot):
6 | def __init__(self):
7 | MJCFBasedRobot.__init__(self, 'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=9)
8 |
9 | def robot_specific_reset(self, bullet_client):
10 | self._p = bullet_client
11 | self.pole2 = self.parts["pole2"]
12 | self.slider = self.jdict["slider"]
13 | self.j1 = self.jdict["hinge"]
14 | self.j2 = self.jdict["hinge2"]
15 | u = self.np_random.uniform(low=-.1, high=.1, size=[2])
16 | self.j1.reset_current_position(float(u[0]), 0)
17 | self.j2.reset_current_position(float(u[1]), 0)
18 | self.j1.set_motor_torque(0)
19 | self.j2.set_motor_torque(0)
20 |
21 | def apply_action(self, a):
22 | assert(np.isfinite(a).all())
23 | self.slider.set_motor_torque(200*float(np.clip(a[0], -1, +1)))
24 |
25 | def calc_state(self):
26 | theta, theta_dot = self.j1.current_position()
27 | gamma, gamma_dot = self.j2.current_position()
28 | x, vx = self.slider.current_position()
29 | self.pos_x, _, self.pos_y = self.pole2.pose().xyz()
30 | assert(np.isfinite(x))
31 | return np.array([
32 | x, vx,
33 | self.pos_x,
34 | np.cos(theta), np.sin(theta), theta_dot,
35 | np.cos(gamma), np.sin(gamma), gamma_dot,
36 | ])
37 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/scenes/__init__.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.scenes.stadium import StadiumScene
2 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/scenes/scene_bases.py:
--------------------------------------------------------------------------------
1 | import sys, os
2 | sys.path.append(os.path.dirname(__file__))
3 | import pybullet as p
4 |
5 | import gym
6 |
7 |
8 | class Scene:
9 | "A base class for single- and multiplayer scenes"
10 |
11 | def __init__(self, bullet_client, gravity, timestep, frame_skip):
12 | self._p = bullet_client
13 | self.np_random, seed = gym.utils.seeding.np_random(None)
14 | self.timestep = timestep
15 | self.frame_skip = frame_skip
16 |
17 | self.dt = self.timestep * self.frame_skip
18 | self.cpp_world = World(self._p, gravity, timestep, frame_skip)
19 |
20 | self.test_window_still_open = True # or never opened
21 | self.human_render_detected = False # if user wants render("human"), we open test window
22 |
23 | self.multiplayer_robots = {}
24 |
25 | def test_window(self):
26 | "Call this function every frame, to see what's going on. Not necessary in learning."
27 | self.human_render_detected = True
28 | return self.test_window_still_open
29 |
30 | def actor_introduce(self, robot):
31 | "Usually after scene reset"
32 | if not self.multiplayer: return
33 | self.multiplayer_robots[robot.player_n] = robot
34 |
35 | def actor_is_active(self, robot):
36 | """
37 | Used by robots to see if they are free to exclusiveley put their HUD on the test window.
38 | Later can be used for click-focus robots.
39 | """
40 | return not self.multiplayer
41 |
42 | def episode_restart(self, bullet_client):
43 | "This function gets overridden by specific scene, to reset specific objects into their start positions"
44 | self.cpp_world.clean_everything()
45 | #self.cpp_world.test_window_history_reset()
46 |
47 | def global_step(self):
48 | """
49 | The idea is: apply motor torques for all robots, then call global_step(), then collect
50 | observations from robots using step() with the same action.
51 | """
52 | self.cpp_world.step(self.frame_skip)
53 |
54 | class SingleRobotEmptyScene(Scene):
55 | multiplayer = False # this class is used "as is" for InvertedPendulum, Reacher
56 |
57 |
58 | class World:
59 |
60 | def __init__(self, bullet_client, gravity, timestep, frame_skip):
61 | self._p = bullet_client
62 | self.gravity = gravity
63 | self.timestep = timestep
64 | self.frame_skip = frame_skip
65 | self.numSolverIterations = 5
66 | self.clean_everything()
67 |
68 | def clean_everything(self):
69 | #p.resetSimulation()
70 | self._p.setGravity(0, 0, -self.gravity)
71 | self._p.setDefaultContactERP(0.9)
72 | #print("self.numSolverIterations=",self.numSolverIterations)
73 | self._p.setPhysicsEngineParameter(fixedTimeStep=self.timestep*self.frame_skip, numSolverIterations=self.numSolverIterations, numSubSteps=self.frame_skip)
74 |
75 | def step(self, frame_skip):
76 | self._p.stepSimulation()
77 |
78 |
79 |
--------------------------------------------------------------------------------
/pybulletgym/envs/roboschool/scenes/stadium.py:
--------------------------------------------------------------------------------
1 | import os, inspect
2 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
3 | parentdir = os.path.dirname(currentdir)
4 | os.sys.path.insert(0,parentdir)
5 |
6 | from .scene_bases import Scene
7 | import pybullet
8 |
9 |
10 | class StadiumScene(Scene):
11 | multiplayer = False
12 | zero_at_running_strip_start_line = True # if False, center of coordinates (0,0,0) will be at the middle of the stadium
13 | stadium_halflen = 105*0.25 # FOOBALL_FIELD_HALFLEN
14 | stadium_halfwidth = 50*0.25 # FOOBALL_FIELD_HALFWID
15 | stadiumLoaded = 0
16 |
17 | def episode_restart(self, bullet_client):
18 | self._p = bullet_client
19 | Scene.episode_restart(self, bullet_client)
20 | if self.stadiumLoaded == 0:
21 | self.stadiumLoaded = 1
22 |
23 | # stadium_pose = cpp_household.Pose()
24 | # if self.zero_at_running_strip_start_line:
25 | # stadium_pose.set_xyz(27, 21, 0) # see RUN_STARTLINE, RUN_RAD constants
26 |
27 | filename = os.path.join(os.path.dirname(__file__), "..", "..", "assets", "scenes", "stadium", "plane_stadium.sdf")
28 | self.ground_plane_mjcf=self._p.loadSDF(filename)
29 | #filename = os.path.join(pybullet_data.getDataPath(),"stadium_no_collision.sdf")
30 | #self.ground_plane_mjcf = p.loadSDF(filename)
31 | #
32 | for i in self.ground_plane_mjcf:
33 | self._p.changeDynamics(i,-1,lateralFriction=0.8, restitution=0.5)
34 | self._p.changeVisualShape(i,-1,rgbaColor=[1,1,1,0.8])
35 | self._p.configureDebugVisualizer(pybullet.COV_ENABLE_PLANAR_REFLECTION,1)
36 |
37 | # for j in range(pybullet.getNumJoints(i)):
38 | # self._p.changeDynamics(i,j,lateralFriction=0)
39 | #despite the name (stadium_no_collision), it DID have collision, so don't add duplicate ground
--------------------------------------------------------------------------------
/pybulletgym/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/examples/roboschool-weights/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/roboschool-weights/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "ppo-mujoco-invdpendulumv2-58222240"
2 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58011633"
3 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58059089"
4 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58112398"
5 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58160617"
6 | all_model_checkpoint_paths: "ppo-mujoco-invdpendulumv2-58222240"
7 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58011633.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58059089.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58112398.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58160617.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invdpendulumv2/ppo-mujoco-invdpendulumv2-58222240.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "ppo-mujoco-invpendulumv2-12789838"
2 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12475216"
3 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12555996"
4 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12635251"
5 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12706882"
6 | all_model_checkpoint_paths: "ppo-mujoco-invpendulumv2-12789838"
7 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12475216.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12555996.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12635251.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12706882.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.data-00000-of-00001
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.index
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/examples/tensorforce/checkpoints/mujoco-invpendulumv2/ppo-mujoco-invpendulumv2-12789838.meta
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/cnn_dqn2013_network.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "type": "conv2d",
4 | "size": 16,
5 | "window": 8,
6 | "stride": 4,
7 | "padding": "VALID"
8 | },
9 | {
10 | "type": "conv2d",
11 | "size": 32,
12 | "window": 4,
13 | "stride": 2,
14 | "padding": "VALID"
15 | },
16 | {
17 | "type": "flatten"
18 | },
19 | {
20 | "type": "dense",
21 | "size": 256,
22 | "activation": "relu"
23 | }
24 | ]
25 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/cnn_dqn_network.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "type": "conv2d",
4 | "size": 32,
5 | "window": 8,
6 | "stride": 4
7 | },
8 | {
9 | "type": "conv2d",
10 | "size": 64,
11 | "window": 4,
12 | "stride": 2
13 | },
14 | {
15 | "type": "conv2d",
16 | "size": 64,
17 | "window": 3,
18 | "stride": 1
19 | },
20 | {
21 | "type": "flatten"
22 | },
23 | {
24 | "type": "dense",
25 | "size": 512
26 | }
27 | ]
28 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/cnn_network_2048.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "type": "conv2d",
4 | "size": 64,
5 | "window": 2,
6 | "stride": 1,
7 | "padding": "VALID"
8 | },
9 | {
10 | "type": "conv2d",
11 | "size": 64,
12 | "window": 2,
13 | "stride": 1,
14 | "padding": "VALID"
15 | },
16 | {
17 | "type": "flatten"
18 | },
19 | {
20 | "type": "dense",
21 | "size": 64,
22 | "activation": "relu"
23 | }
24 | ]
25 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/ddpg.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "ddpg_agent",
3 |
4 | "update_mode": {
5 | "unit": "timesteps",
6 | "batch_size": 64,
7 | "frequency": 64
8 | },
9 | "memory": {
10 | "type": "replay",
11 | "capacity": 100000,
12 | "include_next_states": true
13 | },
14 |
15 | "optimizer": {
16 | "type": "adam",
17 | "learning_rate": 1e-4
18 | },
19 |
20 | "discount": 0.99,
21 | "entropy_regularization": null,
22 |
23 | "critic_network": {
24 | "size_t0": 64,
25 | "size_t1": 64
26 | },
27 | "critic_optimizer": {
28 | "type": "adam",
29 | "learning_rate": 1e-3
30 | },
31 | "target_sync_frequency": 1,
32 | "target_update_weight": 0.999,
33 |
34 | "actions_exploration": {
35 | "type": "ornstein_uhlenbeck",
36 | "sigma": 0.3,
37 | "mu": 0.0,
38 | "theta": 0.15
39 | },
40 |
41 | "saver": {
42 | "directory": null,
43 | "seconds": 600
44 | },
45 | "summarizer": {
46 | "directory": null,
47 | "labels": [],
48 | "seconds": 120
49 | },
50 | "execution": {
51 | "type": "single",
52 | "session_config": null,
53 | "distributed_spec": null
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/dqfd.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "dqfd_agent",
3 |
4 | "update_mode": {
5 | "unit": "timesteps",
6 | "batch_size": 64,
7 | "frequency": 4
8 | },
9 | "memory": {
10 | "type": "replay",
11 | "capacity": 10000,
12 | "include_next_states": true
13 | },
14 |
15 | "optimizer": {
16 | "type": "adam",
17 | "learning_rate": 1e-3
18 | },
19 |
20 | "discount": 0.99,
21 | "entropy_regularization": null,
22 |
23 | "target_sync_frequency": 1000,
24 | "target_update_weight": 1.0,
25 |
26 | "actions_exploration": {
27 | "type": "epsilon_decay",
28 | "initial_epsilon": 0.5,
29 | "final_epsilon": 0.0,
30 | "timesteps": 10000
31 | },
32 |
33 | "saver": {
34 | "directory": null,
35 | "seconds": 600
36 | },
37 | "summarizer": {
38 | "directory": null,
39 | "labels": [],
40 | "seconds": 120
41 | },
42 | "execution": {
43 | "type": "single",
44 | "session_config": null,
45 | "distributed_spec": null
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/dqn.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "dqn_agent",
3 |
4 | "update_mode": {
5 | "unit": "timesteps",
6 | "batch_size": 64,
7 | "frequency": 4
8 | },
9 | "memory": {
10 | "type": "replay",
11 | "capacity": 10000,
12 | "include_next_states": true
13 | },
14 |
15 | "optimizer": {
16 | "type": "clipped_step",
17 | "clipping_value": 0.1,
18 | "optimizer": {
19 | "type": "adam",
20 | "learning_rate": 1e-3
21 | }
22 | },
23 |
24 | "discount": 0.99,
25 | "entropy_regularization": null,
26 | "double_q_model": true,
27 |
28 | "target_sync_frequency": 1000,
29 | "target_update_weight": 1.0,
30 |
31 | "actions_exploration": {
32 | "type": "epsilon_anneal",
33 | "initial_epsilon": 0.5,
34 | "final_epsilon": 0.0,
35 | "timesteps": 10000
36 | },
37 |
38 | "saver": {
39 | "directory": null,
40 | "seconds": 600
41 | },
42 | "summarizer": {
43 | "directory": null,
44 | "labels": [],
45 | "seconds": 120
46 | },
47 | "execution": {
48 | "type": "single",
49 | "session_config": null,
50 | "distributed_spec": null
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/dqn_ue4.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "dqn_agent",
3 |
4 | "update_mode": {
5 | "unit": "timesteps",
6 | "batch_size": 64,
7 | "frequency": 4
8 | },
9 |
10 | "memory": {
11 | "type": "replay",
12 | "capacity": 10000,
13 | "include_next_states": true
14 | },
15 |
16 | "optimizer": {
17 | "type": "adam",
18 | "learning_rate": 1e-3
19 | },
20 |
21 | "discount": 0.97,
22 |
23 | "states_preprocessing": [
24 | {
25 | "type": "divide",
26 | "scale": 255
27 | },
28 | {
29 | "type": "sequence",
30 | "length": 4,
31 | "add_rank": true
32 | }
33 | ],
34 |
35 | "actions_exploration": {
36 | "type": "epsilon_decay",
37 | "initial_epsilon": 1.0,
38 | "final_epsilon": 0.1,
39 | "timesteps": 100000
40 | },
41 |
42 | "saver": {
43 | "directory": null,
44 | "seconds": 600
45 | },
46 |
47 | "summarizer": {
48 | "directory": null,
49 | "labels": [],
50 | "seconds": 120
51 | },
52 | "execution": {
53 | "type": "single",
54 | "session_config": null,
55 | "distributed_spec": null
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/dqn_visual.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "dqn_agent",
3 |
4 | "update_mode": {
5 | "unit": "timesteps",
6 | "batch_size": 64,
7 | "frequency": 4
8 | },
9 |
10 | "memory": {
11 | "type": "replay",
12 | "capacity": 10000,
13 | "include_next_states": true
14 | },
15 |
16 | "optimizer": {
17 | "type": "adam",
18 | "learning_rate": 1e-3
19 | },
20 |
21 | "discount": 0.97,
22 |
23 | "states_preprocessing": [
24 | {
25 | "type": "image_resize",
26 | "width": 84,
27 | "height": 84
28 | },
29 | {
30 | "type": "grayscale"
31 | },
32 | {
33 | "type": "divide",
34 | "scale": 255
35 | }
36 | ],
37 |
38 | "actions_exploration": {
39 | "type": "epsilon_decay",
40 | "initial_epsilon": 1.0,
41 | "final_epsilon": 0.1,
42 | "timesteps": 100000
43 | },
44 |
45 | "saver": {
46 | "directory": null,
47 | "seconds": 600
48 | },
49 |
50 | "summarizer": {
51 | "directory": null,
52 | "labels": [],
53 | "seconds": 120
54 | },
55 | "execution": {
56 | "type": "single",
57 | "session_config": null,
58 | "distributed_spec": null
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/mlp2_embedding_network.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "type": "embedding",
4 | "indices": 100,
5 | "size": 32
6 | },
7 | {
8 | "type": "dense",
9 | "size": 32
10 | },
11 | {
12 | "type": "dense",
13 | "size": 32
14 | }
15 | ]
16 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/mlp2_lstm_network.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "type": "dense",
4 | "size": 32
5 | },
6 | {
7 | "type": "dense",
8 | "size": 32
9 | },
10 | {
11 | "type": "internal_lstm",
12 | "size": 32
13 | }
14 | ]
15 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/mlp2_network.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "type": "dense",
4 | "size": 32,
5 | "activation": "relu"
6 | },
7 | {
8 | "type": "dense",
9 | "size": 32,
10 | "activation": "relu"
11 | }
12 | ]
13 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/mlp2_normalized_network.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "type": "linear",
4 | "size": 64
5 | },
6 | {
7 | "type": "tf_layer",
8 | "layer": "batch_normalization"
9 | },
10 | {
11 | "type": "nonlinearity",
12 | "name": "relu"
13 | },
14 |
15 |
16 | {
17 | "type": "linear",
18 | "size": 64
19 | },
20 | {
21 | "type": "tf_layer",
22 | "layer": "batch_normalization"
23 | },
24 | {
25 | "type": "nonlinearity",
26 | "name": "relu"
27 | },
28 |
29 | {
30 | "type": "dense",
31 | "size": 64,
32 | "activation": null
33 | }
34 | ]
35 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/naf.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "naf_agent",
3 |
4 | "update_mode": {
5 | "unit": "timesteps",
6 | "batch_size": 64,
7 | "frequency": 4
8 | },
9 | "memory": {
10 | "type": "replay",
11 | "capacity": 10000,
12 | "include_next_states": true
13 | },
14 |
15 | "optimizer": {
16 | "type": "adam",
17 | "learning_rate": 1e-3
18 | },
19 |
20 | "discount": 0.99,
21 | "entropy_regularization": null,
22 | "double_q_model": true,
23 |
24 | "target_sync_frequency": 1000,
25 | "target_update_weight": 1.0,
26 |
27 | "actions_exploration": {
28 | "type": "ornstein_uhlenbeck",
29 | "sigma": 0.2,
30 | "mu": 0.0,
31 | "theta": 0.15
32 | },
33 |
34 | "saver": {
35 | "directory": null,
36 | "seconds": 600
37 | },
38 | "summarizer": {
39 | "directory": null,
40 | "labels": [],
41 | "seconds": 120
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/ppo.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "ppo_agent",
3 |
4 | "update_mode": {
5 | "unit": "episodes",
6 | "batch_size": 10,
7 | "frequency": 10
8 | },
9 | "memory": {
10 | "type": "latest",
11 | "include_next_states": false,
12 | "capacity": 5000
13 | },
14 |
15 | "step_optimizer": {
16 | "type": "adam",
17 | "learning_rate": 1e-3
18 | },
19 | "subsampling_fraction": 0.1,
20 | "optimization_steps": 50,
21 |
22 | "discount": 0.99,
23 | "entropy_regularization": 0.01,
24 | "gae_lambda": null,
25 | "likelihood_ratio_clipping": 0.2,
26 |
27 | "baseline_mode": "states",
28 | "baseline": {
29 | "type": "mlp",
30 | "sizes": [32, 32]
31 | },
32 | "baseline_optimizer": {
33 | "type": "multi_step",
34 | "optimizer": {
35 | "type": "adam",
36 | "learning_rate": 1e-3
37 | },
38 | "num_steps": 5
39 | },
40 |
41 | "saver": {
42 | "directory": null,
43 | "seconds": 600
44 | },
45 | "summarizer": {
46 | "directory": null,
47 | "labels": [],
48 | "seconds": 120
49 | },
50 | "execution": {
51 | "type": "single",
52 | "session_config": null,
53 | "distributed_spec": null
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/ppo_cnn.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "ppo_agent",
3 |
4 | "update_mode": {
5 | "unit": "episodes",
6 | "batch_size": 10,
7 | "frequency": 10
8 | },
9 | "memory": {
10 | "type": "latest",
11 | "include_next_states": false,
12 | "capacity": 5000
13 | },
14 |
15 | "step_optimizer": {
16 | "type": "adam",
17 | "learning_rate": 1e-3
18 | },
19 | "subsampling_fraction": 0.1,
20 | "optimization_steps": 50,
21 |
22 | "discount": 0.99,
23 | "entropy_regularization": 0.01,
24 | "gae_lambda": null,
25 | "likelihood_ratio_clipping": 0.2,
26 |
27 | "baseline_mode": "states",
28 | "baseline": {
29 | "type": "cnn",
30 | "conv_sizes": [32, 32],
31 | "dense_sizes": [32]
32 | },
33 | "baseline_optimizer": {
34 | "type": "multi_step",
35 | "optimizer": {
36 | "type": "adam",
37 | "learning_rate": 1e-3
38 | },
39 | "num_steps": 5
40 | },
41 |
42 | "saver": {
43 | "directory": null,
44 | "seconds": 600
45 | },
46 | "summarizer": {
47 | "directory": null,
48 | "labels": [],
49 | "seconds": 120
50 | },
51 | "execution": {
52 | "type": "single",
53 | "session_config": null,
54 | "distributed_spec": null
55 | }
56 | }
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/trpo.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "trpo_agent",
3 |
4 | "update_mode": {
5 | "unit": "episodes",
6 | "batch_size": 20,
7 | "frequency": 20
8 | },
9 | "memory": {
10 | "type": "latest",
11 | "include_next_states": false,
12 | "capacity": 5000
13 | },
14 |
15 | "learning_rate": 1e-2,
16 |
17 | "discount": 0.99,
18 | "entropy_regularization": null,
19 | "gae_lambda": null,
20 | "likelihood_ratio_clipping": null,
21 |
22 | "baseline_mode": null,
23 | "baseline": null,
24 | "baseline_optimizer": null,
25 |
26 | "saver": {
27 | "directory": null,
28 | "seconds": 600
29 | },
30 | "summarizer": {
31 | "directory": null,
32 | "labels": [],
33 | "seconds": 120
34 | },
35 | "execution": {
36 | "type": "single",
37 | "session_config": null,
38 | "distributed_spec": null
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/vpg.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "vpg_agent",
3 |
4 | "update_mode": {
5 | "unit": "episodes",
6 | "batch_size": 20,
7 | "frequency": 20
8 | },
9 | "memory": {
10 | "type": "latest",
11 | "include_next_states": false,
12 | "capacity": 5000
13 | },
14 |
15 | "optimizer": {
16 | "type": "adam",
17 | "learning_rate": 2e-2
18 | },
19 |
20 | "discount": 0.99,
21 | "entropy_regularization": null,
22 | "gae_lambda": null,
23 |
24 | "baseline_mode": "states",
25 | "baseline": {
26 | "type": "mlp",
27 | "sizes": [32, 32]
28 | },
29 | "baseline_optimizer": {
30 | "type": "multi_step",
31 | "optimizer": {
32 | "type": "adam",
33 | "learning_rate": 1e-3
34 | },
35 | "num_steps": 5
36 | },
37 |
38 | "saver": {
39 | "directory": null,
40 | "seconds": 600
41 | },
42 | "summarizer": {
43 | "directory": null,
44 | "labels": [],
45 | "seconds": 120
46 | },
47 | "execution": {
48 | "type": "single",
49 | "session_config": null,
50 | "distributed_spec": null
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/pybulletgym/examples/tensorforce/configs/vpg_baseline_visual.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "vpg_agent",
3 | "batch_size": 4000,
4 | "optimizer": {
5 | "type": "adam",
6 | "learning_rate": 1e-2
7 | },
8 |
9 | "discount": 0.99,
10 | "entropy_regularization": null,
11 | "gae_lambda": 0.97,
12 |
13 | "baseline_mode": "states",
14 | "baseline": {
15 | "type": "mlp",
16 | "sizes": [32, 32]
17 | },
18 | "baseline_optimizer": {
19 | "type": "multi_step",
20 | "optimizer": {
21 | "type": "adam",
22 | "learning_rate": 1e-3
23 | },
24 | "num_steps": 5
25 | },
26 |
27 | "states_preprocessing": [
28 | {
29 | "type": "image_resize",
30 | "width": 84,
31 | "height": 84
32 | },
33 | {
34 | "type": "grayscale"
35 | },
36 | {
37 | "type": "center"
38 | },
39 | {
40 | "type": "sequence",
41 | "length": 4
42 | }
43 | ],
44 | "execution": {
45 | "type": "single",
46 | "session_config": null,
47 | "distributed_spec": null
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/pybulletgym/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/tests/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/tests/roboschool/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/tests/roboschool/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/tests/roboschool/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/tests/roboschool/agents/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/tests/roboschool/agents/policies.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def relu(x):
5 | return np.maximum(x, 0)
6 |
7 |
8 | class SmallReactivePolicy:
9 | """Simple multi-layer perceptron policy, no internal state"""
10 | def __init__(self, observation_space, action_space, weights, biases):
11 | self.weights = weights
12 | self.biases = biases
13 |
14 | def act(self, ob):
15 | x = ob
16 | x = relu(np.dot(x, self.weights[0]) + self.biases[0])
17 | x = relu(np.dot(x, self.weights[1]) + self.biases[1])
18 | x = np.dot(x, self.weights[2]) + self.biases[2]
19 | return x
20 |
--------------------------------------------------------------------------------
/pybulletgym/tests/test_pybulletgym_mujoco_sanity.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 | import traceback
4 | import pybulletgym # required for the Bullet envs to be initialized
5 |
6 | envs = [spec.id for spec in gym.envs.registry.all() if spec.id.find('MuJoCo') >= 0]
7 | bugged_envs = []
8 | for env_name in envs:
9 | try:
10 | print('[TESTING] ENV', env_name, '...')
11 | env = gym.make(env_name)
12 | env.reset()
13 | env.step(np.random.random(env.action_space.shape))
14 | print('[SUCCESS] ENV', env_name, '\n')
15 | except Exception as e:
16 | print(env_name, ': ', traceback.format_exc())
17 | bugged_envs.append(env_name)
18 | print('[FAIL] ENV', env_name, '\n')
19 |
20 | print('The following envs have problems:', bugged_envs)
21 |
--------------------------------------------------------------------------------
/pybulletgym/tests/test_pybylletgym_roboschool_sanity.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 | import traceback
4 | import pybulletgym # required for the Bullet envs to be initialized
5 |
6 | envs = [spec.id for spec in gym.envs.registry.all() if spec.id.find('Bullet') >= 0]
7 | bugged_envs = []
8 | for env_name in envs:
9 | try:
10 | print('[TESTING] ENV', env_name, '...')
11 | env = gym.make(env_name)
12 | env.reset()
13 | env.step(np.random.random(env.action_space.shape))
14 | print('[SUCCESS] ENV', env_name, '\n')
15 | except Exception as e:
16 | print(env_name, ': ', traceback.format_exc())
17 | bugged_envs.append(env_name)
18 | print('[FAIL] ENV', env_name, '\n')
19 |
20 | print('The following envs have problems:', bugged_envs)
21 |
--------------------------------------------------------------------------------
/pybulletgym/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/pybulletgym/utils/__init__.py
--------------------------------------------------------------------------------
/pybulletgym/utils/kerasrl_utils.py:
--------------------------------------------------------------------------------
1 |
2 | import re
3 | from gym import error
4 | import glob
5 | # checkpoints/KerasDDPG-InvertedPendulum-v0-20170701190920_actor.h5
6 | weight_save_re = re.compile(r'^(?:\w+\/)+?(\w+-v\d+)-(\w+-v\d+)-(\d+)(?:_\w+)?\.(\w+)$')
7 |
8 | def get_fields(weight_save_name):
9 | match = weight_save_re.search(weight_save_name)
10 | if not match:
11 | raise error.Error('Attempted to read a malformed weight save: {}. (Currently all weight saves must be of the form {}.)'.format(id,weight_save_re.pattern))
12 | return match.group(1), match.group(2), int(match.group(3))
13 |
14 | def get_latest_save(file_folder, agent_name, env_name, version_number):
15 | """
16 | Returns the properties of the latest weight save. The information can be used to generate the loading path
17 | :return:
18 | """
19 | path = "%s%s"% (file_folder, "*.h5")
20 | file_list = glob.glob(path)
21 | latest_file_properties = []
22 | file_properties = []
23 | for f in file_list:
24 | file_properties = get_fields(f)
25 | if file_properties[0] == agent_name and file_properties[1] == env_name and (latest_file_properties == [] or file_properties[2] > latest_file_properties[2]):
26 | latest_file_properties = file_properties
27 |
28 | return latest_file_properties
29 |
--------------------------------------------------------------------------------
/pybulletgym/utils/robot_dev_util.py:
--------------------------------------------------------------------------------
1 | from pybulletgym.envs.roboschool.robots.locomotors import Ant
2 | import gym.utils.seeding
3 | import numpy as np
4 | import pybullet as p
5 | import time
6 | from importlib import import_module
7 | import sys
8 |
9 |
10 | if __name__ == "__main__":
11 |
12 | physicsClient = p.connect(p.GUI)
13 |
14 | path, class_str = sys.argv[1].rsplit('.', 1)
15 | module = import_module(path)
16 | robot_class = getattr(module, class_str)
17 |
18 | robot = robot_class()
19 | np_random, seed = gym.utils.seeding.np_random()
20 | robot.np_random = np_random
21 | robot.reset(p)
22 |
23 | while True:
24 | try:
25 | robot.apply_action(np.random.uniform(robot.action_space.low, robot.action_space.high, robot.action_space.shape))
26 | p.stepSimulation()
27 | time.sleep(1./240.)
28 | except KeyboardInterrupt:
29 | p.disconnect()
30 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benelot/pybullet-gym/bc68201c8101c4e30dde95f425647a0709ee2f29/requirements.txt
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | import sys, os.path
3 |
4 | # Don't import gym module here, since deps may not be installed
5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'pybulletgym'))
6 |
7 | VERSION = 0.1
8 |
9 | setup_py_dir = os.path.dirname(os.path.realpath(__file__))
10 |
11 | need_files = []
12 | datadir = "pybulletgym/envs/assets"
13 |
14 | hh = setup_py_dir + "/" + datadir
15 |
16 | for root, dirs, files in os.walk(hh):
17 | for fn in files:
18 | ext = os.path.splitext(fn)[1][1:]
19 | if ext and ext in 'png gif jpg urdf sdf obj mtl dae off stl STL xml '.split():
20 | fn = root + "/" + fn
21 | need_files.append(fn[1+len(hh):])
22 |
23 | setup(name='pybulletgym',
24 | version=VERSION,
25 | description='PyBullet Gym is an open-source implementation of the OpenAI Gym MuJoCo environments for use with the OpenAI Gym Reinforcement Learning Research Platform in support of open research.',
26 | url='https://github.com/benelot/pybullet-gym',
27 | author='Benjamin Ellenberger',
28 | author_email='be.ellenberger@gmail.com',
29 | license='',
30 | packages=[package for package in find_packages()
31 | if package.startswith('pybulletgym')],
32 | zip_safe=False,
33 | install_requires=[
34 | 'pybullet>=1.7.8',
35 | ],
36 | package_data={'pybulletgym': need_files},
37 | )
38 |
--------------------------------------------------------------------------------