├── .gitignore
├── DO
    ├── __init__.py
    ├── design_optimization.py
    ├── pso_batch.py
    └── pso_sim.py
├── Environments
    ├── __init__.py
    ├── evoenvs.py
    └── pybullet_evo
    │   ├── README.md
    │   ├── __init__.py
    │   ├── gym_locomotion_envs.py
    │   ├── half_cheetah.xml
    │   ├── robot_bases.py
    │   └── robot_locomotors.py
├── LICENSE
├── README.md
├── RL
    ├── __init__.py
    ├── evoreplay.py
    ├── rl_algorithm.py
    └── soft_actor.py
├── __init__.py
├── coadapt.py
├── data_notebook.ipynb
├── experiment_configs.py
├── main.py
├── requirements.txt
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # Data
107 | data_*/
108 | data_*
109 | 


--------------------------------------------------------------------------------
/DO/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksluck/Coadaptation/c45d1d682262ef04cb797122d6103050b8d8b826/DO/__init__.py


--------------------------------------------------------------------------------
/DO/design_optimization.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class Design_Optimization(object):
 4 | 
 5 |     def __init__(self):
 6 |         pass
 7 | 
 8 |     def optimize_design(self):
 9 |         pass
10 | 


--------------------------------------------------------------------------------
/DO/pso_batch.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import rlkit.torch.pytorch_util as ptu
 4 | import pyswarms as ps
 5 | from .design_optimization import Design_Optimization
 6 | 
 7 | class PSO_batch(Design_Optimization):
 8 | 
 9 |     def __init__(self, config, replay, env):
10 |         self._config = config
11 |         self._replay = replay
12 |         self._env = env
13 | 
14 |         if 'state_batch_size' in self._config.keys():
15 |             self._state_batch_size = self._config['state_batch_size']
16 |         else:
17 |             self._state_batch_size = 32
18 | 
19 | 
20 |     def optimize_design(self, design, q_network, policy_network):
21 |         self._replay.set_mode('start')
22 |         initial_state = self._replay.random_batch(self._state_batch_size)
23 |         initial_state = initial_state['observations']
24 |         design_idxs = self._env.get_design_dimensions()
25 |         # initial_state = initial_state[:,:-len(design)]
26 |         # state_tensor = torch.from_numpy(initial_state).to(device=ptu.device, dtype=torch.float32)
27 | 
28 |         # initial_design = np.array(self._current_design)
29 |         # initial_design = np.array(design)
30 | 
31 |         def f_qval(x_input, **kwargs):
32 |             shape = x_input.shape
33 |             cost = np.zeros((shape[0],))
34 |             with torch.no_grad():
35 |                 for i in range(shape[0]):
36 |                     x = x_input[i:i+1,:]
37 |                     # X = (
38 |                     #     torch.from_numpy(x)
39 |                     #     .to(device=ptu.device, dtype=torch.float32)
40 |                     #     .contiguous()
41 |                     #     .requires_grad_(False)
42 |                     # )
43 |                     # X_expand = X.expand(self._state_batch_size, -1)
44 |                     # network_input = torch.cat((state_tensor,X_expand), -1)
45 |                     state_batch = initial_state.copy()
46 |                     state_batch[:,design_idxs] = x
47 |                     network_input = torch.from_numpy(state_batch).to(device=ptu.device, dtype=torch.float32)
48 |                     action, _, _, _, _, _, _, _, = policy_network(network_input, deterministic=True)
49 |                     output = q_network(network_input, action)
50 |                     #output = self._vf_pop.forward(input)
51 |                     loss = -output.mean().sum()
52 |                     fval = float(loss.item())
53 |                     cost[i] = fval
54 |             return cost
55 | 
56 |         lower_bounds = [l for l, _ in self._env.design_params_bounds]
57 |         lower_bounds = np.array(lower_bounds)
58 |         upper_bounds = [u for _, u in self._env.design_params_bounds]
59 |         upper_bounds = np.array(upper_bounds)
60 |         bounds = (lower_bounds, upper_bounds)
61 |         options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}
62 |         #options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9, 'k': 3, 'p': 2}
63 |         optimizer = ps.single.GlobalBestPSO(n_particles=700, dimensions=len(design), bounds=bounds, options=options)
64 | 
65 |         # Perform optimization
66 |         cost, new_design = optimizer.optimize(f_qval, print_step=100, iters=250, verbose=3) #, n_processes=2)
67 |         return new_design
68 | 


--------------------------------------------------------------------------------
/DO/pso_sim.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import rlkit.torch.pytorch_util as ptu
 4 | import pyswarms as ps
 5 | from .design_optimization import Design_Optimization
 6 | 
 7 | class PSO_simulation(Design_Optimization):
 8 | 
 9 |     def __init__(self, config, replay, env):
10 |         self._config = config
11 |         self._replay = replay
12 |         self._env = env
13 | 
14 |         # TODO Make this independent of rl_algo config
15 |         self._episode_length = self._config['rl_algorithm_config']['algo_params']['num_steps_per_epoch']
16 |         self._reward_scale = self._config['rl_algorithm_config']['algo_params']['reward_scale']
17 | 
18 | 
19 |     def optimize_design(self, design, q_network, policy_network):
20 |         # Important: We reset the design of the environment. Previous design
21 |         #   will be lost
22 | 
23 |         def get_reward_for_design(design):
24 |             self._env.set_new_design(design)
25 |             state = self._env.reset()
26 |             reward_episode = []
27 |             done = False
28 |             nmbr_of_steps = 0
29 |             while not(done) and nmbr_of_steps <= self._episode_length:
30 |                 nmbr_of_steps += 1
31 |                 action, _ = policy_network.get_action(state, deterministic=True)
32 |                 new_state, reward, done, info = self._env.step(action)
33 |                 reward = reward * self._reward_scale
34 |                 reward_episode.append(float(reward))
35 |                 state = new_state
36 |             reward_mean = np.mean(reward_episode)
37 |             return reward_mean
38 | 
39 |         def f_qval(x_input, **kwargs):
40 |             shape = x_input.shape
41 |             cost = np.zeros((shape[0],))
42 |             for i in range(shape[0]):
43 |                 x = x_input[i,:]
44 |                 reward = get_reward_for_design(x)
45 |                 cost[i] = -reward
46 |             return cost
47 | 
48 |         lower_bounds = [l for l, _ in self._env.design_params_bounds]
49 |         lower_bounds = np.array(lower_bounds)
50 |         upper_bounds = [u for _, u in self._env.design_params_bounds]
51 |         upper_bounds = np.array(upper_bounds)
52 |         bounds = (lower_bounds, upper_bounds)
53 |         options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}
54 |         optimizer = ps.single.GlobalBestPSO(n_particles=35, dimensions=len(design), bounds=bounds, options=options)
55 | 
56 |         # Perform optimization
57 |         cost, new_design = optimizer.optimize(f_qval, print_step=100, iters=30, verbose=3)
58 |         return new_design
59 | 


--------------------------------------------------------------------------------
/Environments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksluck/Coadaptation/c45d1d682262ef04cb797122d6103050b8d8b826/Environments/__init__.py


--------------------------------------------------------------------------------
/Environments/evoenvs.py:
--------------------------------------------------------------------------------
 1 | from gym import spaces
 2 | import numpy as np
 3 | from .pybullet_evo.gym_locomotion_envs import HalfCheetahBulletEnv
 4 | import copy
 5 | from utils import BestEpisodesVideoRecorder
 6 | 
 7 | class HalfCheetahEnv(object):
 8 |     def __init__(self, config = {'env' : {'render' : True, 'record_video': False}}):
 9 |         self._config = config
10 |         self._render = self._config['env']['render']
11 |         self._record_video = self._config['env']['record_video']
12 |         self._current_design = [1.0] * 6
13 |         self._config_numpy = np.array(self._current_design)
14 |         self.design_params_bounds = [(0.8, 2.0)] * 6
15 |         self._env = HalfCheetahBulletEnv(render=self._render, design=self._current_design)
16 |         self.init_sim_params = [
17 |             [1.0] * 6,
18 |             [1.41, 0.96, 1.97, 1.73, 1.97, 1.17],
19 |             [1.52, 1.07, 1.11, 1.97, 1.51, 0.99],
20 |             [1.08, 1.18, 1.39, 1.76 , 1.85, 0.92],
21 |             [0.85, 1.54, 0.97, 1.38, 1.10, 1.49],
22 |         ]
23 |         self.observation_space = spaces.Box(-np.inf, np.inf, shape=[self._env.observation_space.shape[0] + 6], dtype=np.float32)#env.observation_space
24 |         self.action_space = self._env.action_space
25 |         self._initial_state = self._env.reset()
26 | 
27 |         if self._record_video:
28 |             self._video_recorder = BestEpisodesVideoRecorder(path=config['data_folder_experiment'], max_videos=5)
29 | 
30 |         # Which dimensions in the state vector are design parameters?
31 |         self._design_dims = list(range(self.observation_space.shape[0] - len(self._current_design), self.observation_space.shape[0]))
32 |         assert len(self._design_dims) == 6
33 | 
34 |     def render(self):
35 |         pass
36 | 
37 |     def step(self, a):
38 |         info = {}
39 |         state, reward, done, _ = self._env.step(a)
40 |         state = np.append(state, self._config_numpy)
41 |         info['orig_action_cost'] = 0.1 * np.mean(np.square(a))
42 |         info['orig_reward'] = reward
43 | 
44 |         if self._record_video:
45 |             self._video_recorder.step(env=self._env, state=state, reward=reward, done=done)
46 | 
47 |         return state, reward, False, info
48 | 
49 | 
50 |     def reset(self):
51 |         state = self._env.reset()
52 |         self._initial_state = state
53 |         state = np.append(state, self._config_numpy)
54 | 
55 |         if self._record_video:
56 |             self._video_recorder.reset(env=self._env, state=state, reward=0, done=False)
57 | 
58 |         return state
59 | 
60 |     def set_new_design(self, vec):
61 |         self._env.reset_design(vec)
62 |         self._current_design = vec
63 |         self._config_numpy = np.array(vec)
64 | 
65 |         if self._record_video:
66 |             self._video_recorder.increase_folder_counter()
67 | 
68 |     def get_random_design(self):
69 |         optimized_params = np.random.uniform(low=0.8, high=2.0, size=6)
70 |         return optimized_params
71 | 
72 |     def get_current_design(self):
73 |         return copy.copy(self._current_design)
74 | 
75 |     def get_design_dimensions(self):
76 |         return copy.copy(self._design_dims)
77 | 


--------------------------------------------------------------------------------
/Environments/pybullet_evo/README.md:
--------------------------------------------------------------------------------
1 | The files in this directory belong to the pybullet project and were changed to enable the automatic design adaptation of agents during the training process.
2 | 
3 | You can find the original files here: https://github.com/bulletphysics/bullet3/tree/master/examples/pybullet/gym
4 | 


--------------------------------------------------------------------------------
/Environments/pybullet_evo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksluck/Coadaptation/c45d1d682262ef04cb797122d6103050b8d8b826/Environments/pybullet_evo/__init__.py


--------------------------------------------------------------------------------
/Environments/pybullet_evo/gym_locomotion_envs.py:
--------------------------------------------------------------------------------
  1 | from pybullet_envs.scene_stadium import SinglePlayerStadiumScene
  2 | from pybullet_envs.env_bases import MJCFBaseBulletEnv
  3 | import numpy as np
  4 | import pybullet
  5 | from .robot_locomotors import  HalfCheetah
  6 | import os, inspect
  7 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
  8 | from gym import spaces
  9 | 
 10 | class WalkerBaseBulletEnv(MJCFBaseBulletEnv):
 11 |   def __init__(self, robot, render=False):
 12 |     # print("WalkerBase::__init__ start")
 13 |     MJCFBaseBulletEnv.__init__(self, robot, render)
 14 | 
 15 |     self.camera_x = 0
 16 |     self.walk_target_x = 1e3  # kilometer away
 17 |     self.walk_target_y = 0
 18 |     self.stateId=-1
 19 |     self._projectM = None
 20 |     self._param_init_camera_width = 320
 21 |     self._param_init_camera_height = 200
 22 |     self._param_camera_distance = 2.0
 23 | 
 24 | 
 25 |   def create_single_player_scene(self, bullet_client):
 26 |     self.stadium_scene = SinglePlayerStadiumScene(bullet_client, gravity=9.8, timestep=0.0165/4, frame_skip=4)
 27 |     return self.stadium_scene
 28 | 
 29 |   def reset(self):
 30 |     if (self.stateId>=0):
 31 |       # print("restoreState self.stateId:",self.stateId)
 32 |       self._p.restoreState(self.stateId)
 33 | 
 34 |     r = MJCFBaseBulletEnv.reset(self)
 35 |     self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING,0)
 36 | 
 37 |     self.parts, self.jdict, self.ordered_joints, self.robot_body = self.robot.addToScene(self._p,
 38 |       self.stadium_scene.ground_plane_mjcf)
 39 |     # self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex], self.parts[f].bodyPartIndex) for f in
 40 |     #              self.foot_ground_object_names])
 41 |     self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING,1)
 42 |     if (self.stateId<0):
 43 |       self.stateId=self._p.saveState()
 44 |       #print("saving state self.stateId:",self.stateId)
 45 |     self._p.setGravity(0,0,-.5)
 46 |     for _ in range(200):
 47 |       self.robot.reset_position()
 48 |       self.scene.global_step()
 49 |     self.robot.reset_position_final()
 50 |     self._p.setGravity(0,0,-9.81)
 51 |     r = self.robot.calc_state()
 52 |     self.robot._initial_z = r[-1]
 53 |     # for _ in range(20):
 54 |     #   self.scene.global_step()
 55 |     #   self.robot.reset_position_final()
 56 |     #   time.sleep(0.1)
 57 |     # self.robot.reset_position()
 58 |     # self.scene.global_step()
 59 |     self.robot.initial_z = None
 60 |     r = self.robot.calc_state()
 61 | 
 62 |     return r
 63 | 
 64 |   def _isDone(self):
 65 |     return self._alive < 0
 66 | 
 67 |   def move_robot(self, init_x, init_y, init_z):
 68 |     "Used by multiplayer stadium to move sideways, to another running lane."
 69 |     self.cpp_robot.query_position()
 70 |     pose = self.cpp_robot.root_part.pose()
 71 |     pose.move_xyz(init_x, init_y, init_z)  # Works because robot loads around (0,0,0), and some robots have z != 0 that is left intact
 72 |     self.cpp_robot.set_pose(pose)
 73 | 
 74 |   electricity_cost   = -2.0  # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant
 75 |   stall_torque_cost  = -0.1  # cost for running electric current through a motor even at zero rotational speed, small
 76 |   foot_collision_cost  = -1.0  # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself
 77 |   foot_ground_object_names = set(["floor"])  # to distinguish ground and other objects
 78 |   joints_at_limit_cost = -0.1  # discourage stuck joints
 79 | 
 80 |   def step(self, a):
 81 |     if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
 82 |       self.robot.apply_action(a)
 83 |       self.scene.global_step()
 84 | 
 85 |     state = self.robot.calc_state()  # also calculates self.joints_at_limit
 86 | 
 87 |     self._alive = float(self.robot.alive_bonus(state[0]+self.robot.initial_z, self.robot.body_rpy[1]))   # state[0] is body height above ground, body_rpy[1] is pitch
 88 |     done = self._isDone()
 89 |     if not np.isfinite(state).all():
 90 |       print("~INF~", state)
 91 |       done = True
 92 | 
 93 |     potential_old = self.potential
 94 |     self.potential = self.robot.calc_potential()
 95 |     progress = float(self.potential - potential_old)
 96 | 
 97 |     feet_collision_cost = 0.0
 98 |     for i,f in enumerate(self.robot.feet): # TODO: Maybe calculating feet contacts could be done within the robot code
 99 |       contact_ids = set((x[2], x[4]) for x in f.contact_list())
100 |       #print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
101 |       if (self.ground_ids & contact_ids):
102 |                           #see Issue 63: https://github.com/openai/roboschool/issues/63
103 |         #feet_collision_cost += self.foot_collision_cost
104 |         self.robot.feet_contact[i] = 1.0
105 |       else:
106 |         self.robot.feet_contact[i] = 0.0
107 | 
108 | 
109 |     electricity_cost  = self.electricity_cost  * float(np.abs(a*self.robot.joint_speeds).mean())  # let's assume we have DC motor with controller, and reverse current braking
110 |     electricity_cost += self.stall_torque_cost * float(np.square(a).mean())
111 | 
112 |     joints_at_limit_cost = float(self.joints_at_limit_cost * self.robot.joints_at_limit)
113 |     debugmode=0
114 |     if(debugmode):
115 |       print("alive=")
116 |       print(self._alive)
117 |       print("progress")
118 |       print(progress)
119 |       print("electricity_cost")
120 |       print(electricity_cost)
121 |       print("joints_at_limit_cost")
122 |       print(joints_at_limit_cost)
123 |       print("feet_collision_cost")
124 |       print(feet_collision_cost)
125 | 
126 |     self.rewards = [
127 |       self._alive,
128 |       progress,
129 |       electricity_cost,
130 |       joints_at_limit_cost,
131 |       feet_collision_cost
132 |       ]
133 |     if (debugmode):
134 |       print("rewards=")
135 |       print(self.rewards)
136 |       print("sum rewards")
137 |       print(sum(self.rewards))
138 |     self.HUD(state, a, done)
139 |     self.reward += sum(self.rewards)
140 | 
141 |     return state, sum(self.rewards), bool(done), {}
142 | 
143 |   # def camera_adjust(self):
144 |   #   x, y, z = self.robot.body_xyz
145 |   #   self.camera._p = self._p
146 |   #   self.camera_x = 0.98*self.camera_x + (1-0.98)*x
147 |   #   self.camera.move_and_look_at(self.camera_x, y-1.0, 1.4, x, y, 1.0)
148 |   def camera_adjust(self):
149 |         if self._p is None :
150 |             return
151 |         self.camera._p = self._p
152 |         x, y, z = self.robot.body_xyz
153 |         if self.camera_x is not None:
154 |             self.camera_x = x # 0.98*self.camera_x + (1-0.98)*x
155 |         else:
156 |             self.camera_x = x
157 |         # self.camera.move_and_look_at(self.camera_x, y-2.0, 1.4, x, y, 1.0)
158 |         lookat = [self.camera_x, y, z]
159 |         distance = self._param_camera_distance
160 |         yaw = 10
161 |         self._p.resetDebugVisualizerCamera(distance, yaw, -20, lookat)
162 | 
163 |   def render_camera_image(self, pixelWidthHeight = None):
164 |     if pixelWidthHeight is not None or self._projectM is None:
165 |         if self._projectM is None:
166 |             self._pixelWidth = self._param_init_camera_width
167 |             self._pixelHeight = self._param_init_camera_height
168 |         else:
169 |             self._pixelWidth = pixelWidthHeight[0]
170 |             self._pixelHeight = pixelWidthHeight[1]
171 |         nearPlane = 0.01
172 |         farPlane = 10
173 |         aspect = self._pixelWidth / self._pixelHeight
174 |         fov = 60
175 |         self._projectM = self._p.computeProjectionMatrixFOV(fov, aspect,
176 |             nearPlane, farPlane)
177 | 
178 |     x, y, z = self.robot.robot_body.pose().xyz()
179 |     lookat = [x, y, 0.5]
180 |     distance = 2.0
181 |     yaw = -20
182 |     viewM = self._p.computeViewMatrixFromYawPitchRoll(
183 |         cameraTargetPosition=lookat,
184 |         distance=distance,
185 |         yaw=10.,
186 |         pitch=yaw,
187 |         roll=0.0,
188 |         upAxisIndex=2)
189 | 
190 |     # img_arr = pybullet.getCameraImage(self._pixelWidth, self._pixelHeight, viewM, self._projectM, shadow=1,lightDirection=[1,1,1],renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
191 |     img_arr = pybullet.getCameraImage(self._pixelWidth, self._pixelHeight, viewM, self._projectM, shadow=False, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL, flags=pybullet.ER_NO_SEGMENTATION_MASK)
192 | 
193 |     w=img_arr[0] #width of the image, in pixels
194 |     h=img_arr[1] #height of the image, in pixels
195 |     rgb=img_arr[2] #color data RGB
196 | 
197 |     image = np.reshape(rgb, (h, w, 4)) #Red, Green, Blue, Alpha
198 |     image = image * (1./255.)
199 |     image = image[:,:,0:3]
200 |     return image
201 | 
202 | class HalfCheetahBulletEnv(WalkerBaseBulletEnv):
203 |   def __init__(self, render=False, design = None):
204 |     self.robot = HalfCheetah(design)
205 |     WalkerBaseBulletEnv.__init__(self, self.robot, render)
206 |     self.observation_space = spaces.Box(-np.inf, np.inf, shape=[17], dtype=np.float32)
207 | 
208 |   def _isDone(self):
209 |     return False
210 | 
211 |   def disconnect(self):
212 |       self._p.disconnect()
213 | 
214 |   def step(self, a):
215 |     if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
216 |       self.robot.apply_action(a)
217 |       self.scene.global_step()
218 | 
219 |     state = self.robot.calc_state()  # also calculates self.joints_at_limit
220 | 
221 |     done = self._isDone()
222 |     if not np.isfinite(state).all():
223 |       print("~INF~", state)
224 |       done = 0
225 |     reward = max(state[-5]/10.0, 0.0)
226 | 
227 |     return state, reward, bool(done), {}
228 | 
229 |   def reset_design(self, design):
230 |       self.stateId = -1
231 |       self.scene = None
232 |       self.robot.reset_design(self._p, design)
233 | 


--------------------------------------------------------------------------------
/Environments/pybullet_evo/half_cheetah.xml:
--------------------------------------------------------------------------------
 1 | <!-- Cheetah Model
 2 |     The state space is populated with joints in the order that they are
 3 |     defined in this file. The actuators also operate on joints.
 4 |     State-Space (name/joint/parameter):
 5 |         - rootx     slider      position (m)
 6 |         - rootz     slider      position (m)
 7 |         - rooty     hinge       angle (rad)
 8 |         - bthigh    hinge       angle (rad)
 9 |         - bshin     hinge       angle (rad)
10 |         - bfoot     hinge       angle (rad)
11 |         - fthigh    hinge       angle (rad)
12 |         - fshin     hinge       angle (rad)
13 |         - ffoot     hinge       angle (rad)
14 |         - rootx     slider      velocity (m/s)
15 |         - rootz     slider      velocity (m/s)
16 |         - rooty     hinge       angular velocity (rad/s)
17 |         - bthigh    hinge       angular velocity (rad/s)
18 |         - bshin     hinge       angular velocity (rad/s)
19 |         - bfoot     hinge       angular velocity (rad/s)
20 |         - fthigh    hinge       angular velocity (rad/s)
21 |         - fshin     hinge       angular velocity (rad/s)
22 |         - ffoot     hinge       angular velocity (rad/s)
23 |     Actuators (name/actuator/parameter):
24 |         - bthigh    hinge       torque (N m)
25 |         - bshin     hinge       torque (N m)
26 |         - bfoot     hinge       torque (N m)
27 |         - fthigh    hinge       torque (N m)
28 |         - fshin     hinge       torque (N m)
29 |         - ffoot     hinge       torque (N m)
30 | -->
31 | <mujoco model="cheetah">
32 |   <compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
33 |   <default>
34 |     <joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
35 |     <geom conaffinity="0" condim="3" contype="1" friction="0.4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
36 |     <motor ctrllimited="true" ctrlrange="-1 1"/>
37 |   </default>
38 |   <size nstack="300000" nuser_geom="1"/>
39 |   <option gravity="0 0 -9.81" timestep="0.01"/>
40 |   <worldbody>
41 |     <body name="torso" pos="0 0 .7">
42 |       <joint armature="0" axis="1 0 0" damping="0" limited="false" name="ignorex" pos="0 0 0" stiffness="0" type="slide"/>
43 |       <joint armature="0" axis="0 0 1" damping="0" limited="false" name="ignorez" pos="0 0 0" stiffness="0" type="slide"/>
44 |       <joint armature="0" axis="0 1 0" damping="0" limited="false" name="ignorey" pos="0 0 0" stiffness="0" type="hinge"/>
45 |       <geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
46 |       <geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
47 |       <!-- <site name='tip'  pos='.15 0 .11'/>-->
48 |       <body name="bthigh" pos="-.5 0 0">
49 |         <joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
50 |         <geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
51 |         <body name="bshin" pos=".16 0 -.25">
52 |           <joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.87 .87" stiffness="180" type="hinge"/>
53 |           <geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
54 |           <body name="bfoot" pos="-.28 0 -.14">
55 |             <joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.87 .87" stiffness="120" type="hinge"/>
56 |             <geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
57 |           </body>
58 |         </body>
59 |       </body>
60 |       <body name="fthigh" pos=".5 0 0">
61 |         <joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1.0 0.007" stiffness="180" type="hinge"/>
62 |         <geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
63 |         <body name="fshin" pos="-.14 0 -.24">
64 |           <joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.22 0.87" stiffness="120" type="hinge"/>
65 |           <geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
66 |           <body name="ffoot" pos=".13 0 -.18">
67 |             <joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-0.488 0.488" stiffness="60" type="hinge"/>
68 |             <geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
69 |           </body>
70 |         </body>
71 |       </body>
72 |     </body>
73 |   </worldbody>
74 |   <actuator>
75 |     <motor gear="120" joint="bthigh" name="bthigh"/>
76 |     <motor gear="90" joint="bshin" name="bshin"/>
77 |     <motor gear="60" joint="bfoot" name="bfoot"/>
78 |     <motor gear="90" joint="fthigh" name="fthigh"/>
79 |     <motor gear="60" joint="fshin" name="fshin"/>
80 |     <motor gear="30" joint="ffoot" name="ffoot"/>
81 |   </actuator>
82 | </mujoco>
83 | 


--------------------------------------------------------------------------------
/Environments/pybullet_evo/robot_bases.py:
--------------------------------------------------------------------------------
  1 | import pybullet
  2 | import gym, gym.spaces, gym.utils
  3 | import numpy as np
  4 | import os, inspect
  5 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
  6 | parentdir = os.path.dirname(currentdir)
  7 | os.sys.path.insert(0,parentdir)
  8 | import pybullet_data
  9 | 
 10 | 
 11 | class XmlBasedRobot:
 12 |   """
 13 |   Base class for mujoco .xml based agents.
 14 |   """
 15 | 
 16 |   self_collision = True
 17 |   def __init__(self,  robot_name, action_dim, obs_dim, self_collision):
 18 |     self.parts = None
 19 |     self.objects = []
 20 |     self.jdict = None
 21 |     self.ordered_joints = None
 22 |     self.robot_body = None
 23 | 
 24 |     high = np.ones([action_dim])
 25 |     self.action_space = gym.spaces.Box(-high, high)
 26 |     high = np.inf * np.ones([obs_dim])
 27 |     self.observation_space = gym.spaces.Box(-high, high)
 28 | 
 29 |     #self.model_xml = model_xml
 30 |     self.robot_name = robot_name
 31 |     self.self_collision = self_collision
 32 | 
 33 |   def addToScene(self, bullet_client, bodies):
 34 |     self._p = bullet_client
 35 | 
 36 |     if self.parts is not None:
 37 |       parts = self.parts
 38 |     else:
 39 |       parts = {}
 40 | 
 41 |     if self.jdict is not None:
 42 |       joints = self.jdict
 43 |     else:
 44 |       joints = {}
 45 | 
 46 |     if self.ordered_joints is not None:
 47 |       ordered_joints = self.ordered_joints
 48 |     else:
 49 |       ordered_joints = []
 50 | 
 51 |     if np.isscalar(bodies):  # streamline the case where bodies is actually just one body
 52 |       bodies = [bodies]
 53 | 
 54 |     dump = 0
 55 |     for i in range(len(bodies)):
 56 |       if self._p.getNumJoints(bodies[i]) == 0:
 57 |         part_name, robot_name = self._p.getBodyInfo(bodies[i])
 58 |         self.robot_name = robot_name.decode("utf8")
 59 |         part_name = part_name.decode("utf8")
 60 |         parts[part_name] = BodyPart(self._p, part_name, bodies, i, -1)
 61 |       for j in range(self._p.getNumJoints(bodies[i])):
 62 |         self._p.setJointMotorControl2(bodies[i],j,pybullet.POSITION_CONTROL,positionGain=0.1,velocityGain=0.1,force=0)
 63 |         jointInfo = self._p.getJointInfo(bodies[i], j)
 64 |         joint_name=jointInfo[1]
 65 |         part_name=jointInfo[12]
 66 | 
 67 |         joint_name = joint_name.decode("utf8")
 68 |         part_name = part_name.decode("utf8")
 69 | 
 70 |         if dump: print("ROBOT PART '%s'" % part_name)
 71 |         if dump: print("ROBOT JOINT '%s'" % joint_name)  # limits = %+0.2f..%+0.2f effort=%0.3f speed=%0.3f" % ((joint_name,) + j.limits()) )
 72 | 
 73 |         parts[part_name] = BodyPart(self._p, part_name, bodies, i, j)
 74 | 
 75 |         if part_name == self.robot_name:
 76 |           self.robot_body = parts[part_name]
 77 | 
 78 |         if i == 0 and j == 0 and self.robot_body is None:  # if nothing else works, we take this as robot_body
 79 |           parts[self.robot_name] = BodyPart(self._p, self.robot_name, bodies, 0, -1)
 80 |           self.robot_body = parts[self.robot_name]
 81 | 
 82 |         if joint_name[:6] == "ignore":
 83 |           Joint(self._p, joint_name, bodies, i, j).disable_motor()
 84 |           continue
 85 | 
 86 |         if joint_name[:8] != "jointfix":
 87 |           joints[joint_name] = Joint(self._p, joint_name, bodies, i, j)
 88 |           ordered_joints.append(joints[joint_name])
 89 | 
 90 |           joints[joint_name].power_coef = 100.0
 91 | 
 92 |         # TODO: Maybe we need this
 93 |         # joints[joint_name].power_coef, joints[joint_name].max_velocity = joints[joint_name].limits()[2:4]
 94 |         # self.ordered_joints.append(joints[joint_name])
 95 |         # self.jdict[joint_name] = joints[joint_name]
 96 | 
 97 |     return parts, joints, ordered_joints, self.robot_body
 98 | 
 99 |   def reset_pose(self, position, orientation):
100 |     self.parts[self.robot_name].reset_pose(position, orientation)
101 | 
102 | class MJCFBasedRobot(XmlBasedRobot):
103 |   """
104 |   Base class for mujoco .xml based agents.
105 |   """
106 | 
107 |   def __init__(self,  model_xml, robot_name, action_dim, obs_dim, self_collision=True):
108 |     XmlBasedRobot.__init__(self, robot_name, action_dim, obs_dim, self_collision)
109 |     self.model_xml = model_xml
110 |     self.doneLoading=0
111 |   def reset(self, bullet_client):
112 |     # print("Load")
113 |     # print(self.doneLoading)
114 |     # print(self.ordered_joints)
115 |     self._p = bullet_client
116 |     #print("Created bullet_client with id=", self._p._client)
117 |     if (self.doneLoading==0):
118 |       self.ordered_joints = []
119 |       self.doneLoading=1
120 |       file_path = self.model_xml #os.path.join(currentdir, self.model_xml)
121 |       if self.self_collision:
122 |         self.objects = self._p.loadMJCF(file_path, flags=pybullet.URDF_USE_SELF_COLLISION|pybullet.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS)
123 |         self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(self._p, self.objects  )
124 |       else:
125 |         self.objects = self._p.loadMJCF(file_path)
126 |         self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(self._p, self.objects)
127 |     self.robot_specific_reset(self._p)
128 | 
129 |     s = self.calc_state()  # optimization: calc_state() can calculate something in self.* for calc_potential() to use
130 | 
131 |     return s
132 | 
133 |   def calc_potential(self):
134 |     return 0
135 | 
136 | 
137 | class URDFBasedRobot(XmlBasedRobot):
138 |   """
139 |   Base class for URDF .xml based robots.
140 |   """
141 | 
142 |   def __init__(self, model_urdf, robot_name, action_dim, obs_dim, basePosition=[0, 0, 0], baseOrientation=[0, 0, 0, 1], fixed_base=False, self_collision=False):
143 |     XmlBasedRobot.__init__(self, robot_name, action_dim, obs_dim, self_collision)
144 | 
145 |     self.model_urdf = model_urdf
146 |     self.basePosition = basePosition
147 |     self.baseOrientation = baseOrientation
148 |     self.fixed_base = fixed_base
149 | 
150 |   def reset(self, bullet_client):
151 |     self._p = bullet_client
152 |     self.ordered_joints = []
153 | 
154 |     print(os.path.join(os.path.dirname(__file__), "data", self.model_urdf))
155 | 
156 |     if self.self_collision:
157 |       self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(self._p,
158 |         self._p.loadURDF(os.path.join(pybullet_data.getDataPath(), self.model_urdf),
159 |         basePosition=self.basePosition,
160 |         baseOrientation=self.baseOrientation,
161 |         useFixedBase=self.fixed_base,
162 |         flags=pybullet.URDF_USE_SELF_COLLISION))
163 |     else:
164 |       self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(self._p,
165 |         self._p.loadURDF(os.path.join(pybullet_data.getDataPath(), self.model_urdf),
166 |         basePosition=self.basePosition,
167 |         baseOrientation=self.baseOrientation,
168 |         useFixedBase=self.fixed_base))
169 | 
170 |     self.robot_specific_reset(self._p)
171 | 
172 |     s = self.calc_state()  # optimization: calc_state() can calculate something in self.* for calc_potential() to use
173 |     self.potential = self.calc_potential()
174 | 
175 |     return s
176 | 
177 |   def calc_potential(self):
178 |     return 0
179 | 
180 | 
181 | class SDFBasedRobot(XmlBasedRobot):
182 |   """
183 |   Base class for SDF robots in a Scene.
184 |   """
185 | 
186 |   def __init__(self,  model_sdf, robot_name, action_dim, obs_dim, basePosition=[0, 0, 0], baseOrientation=[0, 0, 0, 1], fixed_base=False, self_collision=False):
187 |     XmlBasedRobot.__init__(self, robot_name, action_dim, obs_dim, self_collision)
188 | 
189 |     self.model_sdf = model_sdf
190 |     self.fixed_base = fixed_base
191 | 
192 |   def reset(self, bullet_client):
193 |     self._p = bullet_client
194 | 
195 |     self.ordered_joints = []
196 | 
197 |     self.parts, self.jdict, self.ordered_joints, self.robot_body = self.addToScene(self._p,  # TODO: Not sure if this works, try it with kuka
198 |       self._p.loadSDF(os.path.join("models_robot", self.model_sdf)))
199 | 
200 |     self.robot_specific_reset(self._p)
201 | 
202 |     s = self.calc_state()  # optimization: calc_state() can calculate something in self.* for calc_potential() to use
203 |     self.potential = self.calc_potential()
204 | 
205 |     return s
206 | 
207 |   def calc_potential(self):
208 |     return 0
209 | 
210 | 
211 | class Pose_Helper: # dummy class to comply to original interface
212 |   def __init__(self, body_part):
213 |     self.body_part = body_part
214 | 
215 |   def xyz(self):
216 |     return self.body_part.current_position()
217 | 
218 |   def rpy(self):
219 |     return pybullet.getEulerFromQuaternion(self.body_part.current_orientation())
220 | 
221 |   def orientation(self):
222 |     return self.body_part.current_orientation()
223 | 
224 | class BodyPart:
225 |   def __init__(self, bullet_client, body_name, bodies, bodyIndex, bodyPartIndex):
226 |     self.bodies = bodies
227 |     self._p = bullet_client
228 |     self.bodyIndex = bodyIndex
229 |     self.bodyPartIndex = bodyPartIndex
230 |     self.initialPosition = self.current_position()
231 |     self.initialOrientation = self.current_orientation()
232 |     self.bp_pose = Pose_Helper(self)
233 | 
234 |   def state_fields_of_pose_of(self, body_id, link_id=-1):  # a method you will most probably need a lot to get pose and orientation
235 |     if link_id == -1:
236 |       (x, y, z), (a, b, c, d) = self._p.getBasePositionAndOrientation(body_id)
237 |     else:
238 |       (x, y, z), (a, b, c, d), _, _, _, _ = self._p.getLinkState(body_id, link_id)
239 |     return np.array([x, y, z, a, b, c, d])
240 | 
241 |   def get_position(self): return self.current_position()
242 | 
243 |   def get_pose(self):
244 |     return self.state_fields_of_pose_of(self.bodies[self.bodyIndex], self.bodyPartIndex)
245 | 
246 |   def speed(self):
247 |     if self.bodyPartIndex == -1:
248 |       (vx, vy, vz), _ = self._p.getBaseVelocity(self.bodies[self.bodyIndex])
249 |     else:
250 |       (x,y,z), (a,b,c,d), _,_,_,_, (vx, vy, vz), (vr,vp,vy) = self._p.getLinkState(self.bodies[self.bodyIndex], self.bodyPartIndex, computeLinkVelocity=1)
251 |     return np.array([vx, vy, vz])
252 | 
253 |   def speed_angular(self):
254 |     if self.bodyPartIndex == -1:
255 |       (vx, vy, vz), (vr, vp, vy) = self._p.getBaseVelocity(self.bodies[self.bodyIndex])
256 |     else:
257 |       (x,y,z), (a,b,c,d), _,_,_,_, (vx, vy, vz), (vr,vp,vy) = self._p.getLinkState(self.bodies[self.bodyIndex], self.bodyPartIndex, computeLinkVelocity=1)
258 |     return np.array([vr, vp, vy])
259 | 
260 |   def current_position(self):
261 |     return self.get_pose()[:3]
262 | 
263 |   def current_orientation(self):
264 |     return self.get_pose()[3:]
265 | 
266 |   def get_orientation(self):
267 |     return self.current_orientation()
268 | 
269 |   def reset_position(self, position):
270 |     self._p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, self.get_orientation())
271 | 
272 |   def reset_orientation(self, orientation):
273 |     self._p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], self.get_position(), orientation)
274 | 
275 |   def reset_velocity(self, linearVelocity=[0,0,0], angularVelocity =[0,0,0]):
276 |     self._p.resetBaseVelocity(self.bodies[self.bodyIndex], linearVelocity, angularVelocity)
277 | 
278 |   def reset_pose(self, position, orientation):
279 |     self._p.resetBasePositionAndOrientation(self.bodies[self.bodyIndex], position, orientation)
280 | 
281 |   def pose(self):
282 |     return self.bp_pose
283 | 
284 |   def contact_list(self):
285 |     return self._p.getContactPoints(self.bodies[self.bodyIndex], -1, self.bodyPartIndex, -1)
286 | 
287 | 
288 | class Joint:
289 |   def __init__(self, bullet_client, joint_name, bodies, bodyIndex, jointIndex):
290 |     self.bodies = bodies
291 |     self._p = bullet_client
292 |     self.bodyIndex = bodyIndex
293 |     self.jointIndex = jointIndex
294 |     self.joint_name = joint_name
295 | 
296 |     jointInfo = self._p.getJointInfo(self.bodies[self.bodyIndex], self.jointIndex)
297 |     self.lowerLimit = jointInfo[8]
298 |     self.upperLimit = jointInfo[9]
299 | 
300 |     self.power_coeff = 0
301 | 
302 |   def set_state(self, x, vx):
303 |     self._p.resetJointState(self.bodies[self.bodyIndex], self.jointIndex, x, vx)
304 | 
305 |   def current_position(self): # just some synonyme method
306 |     return self.get_state()
307 | 
308 |   def current_relative_position(self):
309 |     pos, vel = self.get_state()
310 |     pos_mid = 0.5 * (self.lowerLimit + self.upperLimit);
311 |     return (
312 |       2 * (pos - pos_mid) / (self.upperLimit - self.lowerLimit),
313 |       0.1 * vel
314 |     )
315 | 
316 |   def get_state(self):
317 |     x, vx,_,_ = self._p.getJointState(self.bodies[self.bodyIndex],self.jointIndex)
318 |     return x, vx
319 | 
320 |   def get_position(self):
321 |     x, _ = self.get_state()
322 |     return x
323 | 
324 |   def get_orientation(self):
325 |     _,r = self.get_state()
326 |     return r
327 | 
328 |   def get_velocity(self):
329 |     _, vx = self.get_state()
330 |     return vx
331 | 
332 |   def set_position(self, position):
333 |     self._p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,pybullet.POSITION_CONTROL, targetPosition=position)
334 | 
335 |   def set_velocity(self, velocity):
336 |     self._p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,pybullet.VELOCITY_CONTROL, targetVelocity=velocity)
337 | 
338 |   def set_motor_torque(self, torque): # just some synonyme method
339 |     self.set_torque(torque)
340 | 
341 |   def set_torque(self, torque):
342 |     self._p.setJointMotorControl2(bodyIndex=self.bodies[self.bodyIndex], jointIndex=self.jointIndex, controlMode=pybullet.TORQUE_CONTROL, force=torque) #, positionGain=0.1, velocityGain=0.1)
343 | 
344 |   def reset_current_position(self, position, velocity): # just some synonyme method
345 |     self.reset_position(position, velocity)
346 | 
347 |   def reset_position(self, position, velocity):
348 |     self._p.resetJointState(self.bodies[self.bodyIndex],self.jointIndex,targetValue=position, targetVelocity=velocity)
349 |     self.disable_motor()
350 | 
351 |   def disable_motor(self):
352 |     self._p.setJointMotorControl2(self.bodies[self.bodyIndex],self.jointIndex,controlMode=pybullet.POSITION_CONTROL, targetPosition=0, targetVelocity=0, positionGain=0.1, velocityGain=0.1, force=0)
353 | 


--------------------------------------------------------------------------------
/Environments/pybullet_evo/robot_locomotors.py:
--------------------------------------------------------------------------------
  1 | from .robot_bases import XmlBasedRobot, MJCFBasedRobot, URDFBasedRobot
  2 | import numpy as np
  3 | import pybullet
  4 | import os, inspect
  5 | import pybullet_data
  6 | from .robot_bases import BodyPart
  7 | import tempfile
  8 | import atexit
  9 | import xmltodict
 10 | 
 11 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 12 | 
 13 | def cleanup_func_for_tmp(filepath):
 14 |     os.remove(filepath)
 15 | 
 16 | class WalkerBase(MJCFBasedRobot):
 17 |   def __init__(self,  fn, robot_name, action_dim, obs_dim, power, self_collision=True):
 18 |     MJCFBasedRobot.__init__(self, fn, robot_name, action_dim, obs_dim, self_collision=True)
 19 |     self.power = power
 20 |     self.camera_x = 0
 21 |     self.start_pos_x, self.start_pos_y, self.start_pos_z = 0, 0, 0
 22 |     self.walk_target_x = 1e3  # kilometer away
 23 |     self.walk_target_y = 0
 24 |     self.body_xyz=[0,0,0]
 25 | 
 26 |   def robot_specific_reset(self, bullet_client):
 27 |     self._p = bullet_client
 28 |     self._reset_position = []
 29 |     for j in self.ordered_joints:
 30 |       pos = self.np_random.uniform(low=-0.5, high=0.5)
 31 |       j.reset_current_position(pos, 0)
 32 |       self._reset_position.append(pos)
 33 | 
 34 |     self.feet = [self.parts[f] for f in self.foot_list]
 35 |     self.feet_contact = np.array([0.0 for f in self.foot_list], dtype=np.float32)
 36 |     self.scene.actor_introduce(self)
 37 |     self.initial_z = None
 38 | 
 39 |   def reset_position(self):
 40 |     for j, pos in zip(self.ordered_joints, self._reset_position):
 41 |       j.set_velocity(0)
 42 |       # j.disable_motor()
 43 | 
 44 |   def reset_position_final(self):
 45 |     for j, pos in zip(self.ordered_joints, self._reset_position):
 46 |       # j.reset_current_position(pos, 0)
 47 |       j.disable_motor()
 48 | 
 49 | 
 50 |   def apply_action(self, a):
 51 |     assert (np.isfinite(a).all())
 52 |     for n, j in enumerate(self.ordered_joints):
 53 |       j.set_motor_torque(self.power * j.power_coef * float(np.clip(a[n], -1, +1)))
 54 | 
 55 |   def calc_state(self):
 56 |     j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten()
 57 |     # even elements [0::2] position, scaled to -1..+1 between limits
 58 |     # odd elements  [1::2] angular speed, scaled to show -1..+1
 59 |     self.joint_speeds = j[1::2]
 60 |     self.joints_at_limit = np.count_nonzero(np.abs(j[0::2]) > 0.99)
 61 | 
 62 |     body_pose = self.robot_body.pose()
 63 |     parts_xyz = np.array([p.pose().xyz() for p in self.parts.values()]).flatten()
 64 |     self.body_xyz = (
 65 |     parts_xyz[0::3].mean(), parts_xyz[1::3].mean(), body_pose.xyz()[2])  # torso z is more informative than mean z
 66 |     self.body_rpy = body_pose.rpy()
 67 |     z = self.body_xyz[2]
 68 |     if self.initial_z == None:
 69 |       self.initial_z = z
 70 |     r, p, yaw = self.body_rpy
 71 |     self.walk_target_theta = np.arctan2(self.walk_target_y - self.body_xyz[1],
 72 |                       self.walk_target_x - self.body_xyz[0])
 73 |     self.walk_target_dist = np.linalg.norm(
 74 |       [self.walk_target_y - self.body_xyz[1], self.walk_target_x - self.body_xyz[0]])
 75 |     angle_to_target = self.walk_target_theta - yaw
 76 | 
 77 |     rot_speed = np.array(
 78 |       [[np.cos(-yaw), -np.sin(-yaw), 0],
 79 |        [np.sin(-yaw), np.cos(-yaw), 0],
 80 |        [    0,       0, 1]]
 81 |     )
 82 |     vx, vy, vz = np.dot(rot_speed, self.robot_body.speed())  # rotate speed back to body point of view
 83 | 
 84 |     more = np.array([ z-self.initial_z,
 85 |       np.sin(angle_to_target), np.cos(angle_to_target),
 86 |       0.3* vx , 0.3* vy , 0.3* vz ,  # 0.3 is just scaling typical speed into -1..+1, no physical sense here
 87 |       r, p], dtype=np.float32)
 88 |     return np.clip( np.concatenate([more] + [j] + [self.feet_contact]), -5, +5)
 89 | 
 90 |   def calc_potential(self):
 91 |     # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second),
 92 |     # all rewards have rew/frame units and close to 1.0
 93 |     debugmode=0
 94 |     if (debugmode):
 95 |       print("calc_potential: self.walk_target_dist")
 96 |       print(self.walk_target_dist)
 97 |       print("self.scene.dt")
 98 |       print(self.scene.dt)
 99 |       print("self.scene.frame_skip")
100 |       print(self.scene.frame_skip)
101 |       print("self.scene.timestep")
102 |       print(self.scene.timestep)
103 |     return - self.walk_target_dist / self.scene.dt
104 | 
105 | class HalfCheetah(WalkerBase):
106 |   foot_list = ["ffoot", "fshin", "fthigh",  "bfoot", "bshin", "bthigh"]  # track these contacts with ground
107 | 
108 |   def __init__(self, design = None):
109 |     self._adapted_xml_file = tempfile.NamedTemporaryFile(delete=False, prefix='halfcheetah_', suffix='.xml')
110 |     self._adapted_xml_filepath = self._adapted_xml_file.name
111 |     file = self._adapted_xml_filepath
112 |     self._adapted_xml_file.close()
113 |     atexit.register(cleanup_func_for_tmp, self._adapted_xml_filepath)
114 |     self.adapt_xml(self._adapted_xml_filepath, design)
115 |     #WalkerBase.__init__(self, self._adapted_xml_filepath, "torso", action_dim=6, obs_dim=26, power=0.90)
116 |     WalkerBase.__init__(self, file, "torso", action_dim=6, obs_dim=26, power=0.90)
117 | 
118 |   def adapt_xml(self, file, design = None):
119 |       with open(os.path.join(currentdir, 'half_cheetah.xml'), 'r') as fd:
120 |             xml_string = fd.read()
121 |       height = 1.0
122 |       bth_r = 1.0
123 |       bsh_r = 1.0
124 |       bfo_r = 1.0
125 |       fth_r = 1.0
126 |       fsh_r = 1.0
127 |       ffo_r = 1.0
128 |       if design is None:
129 |         bth_r, bsh_r, bfo_r, fth_r, fsh_r, ffo_r = np.random.uniform(low=0.5, high=1.5, size=6)
130 |       else:
131 |         bth_r, bsh_r, bfo_r, fth_r, fsh_r, ffo_r = design
132 |       height = max(.145 * bth_r + .15 * bsh_r + .094 * bfo_r, .133 * fth_r + .106 * fsh_r +  .07 * ffo_r)
133 |       height *= 2.0 + 0.01
134 | 
135 |       xml_dict = xmltodict.parse(xml_string)
136 |       xml_dict['mujoco']['worldbody']['body']['@pos'] = "0 0 {}".format(height)
137 | 
138 |       ### Btigh
139 | 
140 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['geom']['@pos'] = '.1 0 -.13'
141 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['geom']['@pos'] = '{} 0 {}'.format(.1 * bth_r, -.13 * bth_r)
142 | 
143 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['geom']['@size'] = '0.046 .145'
144 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['geom']['@size'] = '0.046 {}'.format(.145 * bth_r)
145 | 
146 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['@pos'] = '.16 0 -.25'
147 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['@pos'] = '{} 0 {}'.format(.16 * bth_r, -.25 * bth_r)
148 | 
149 |       ### bshin
150 | 
151 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['geom']['@pos'] = '-.14 0 -.07'
152 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['geom']['@pos'] = '{} 0 {}'.format(-.14 * bsh_r, -.07 * bsh_r)
153 | 
154 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['geom']['@size'] = '0.046 .15'
155 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['geom']['@size'] = '0.046 {}'.format(.15 * bsh_r)
156 | 
157 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['body']['@pos'] = '-.28 0 -.14'
158 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['body']['@pos'] = '{} 0 {}'.format(-.28 * bsh_r, -.14 * bsh_r)
159 | 
160 |       ### bfoot
161 | 
162 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['body']['geom']['@pos'] = '.03 0 -.097'
163 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['body']['geom']['@pos'] = '{} 0 {}'.format(.03 * bfo_r, -.097 * bfo_r)
164 | 
165 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['body']['geom']['@size'] = '0.046 .094'
166 |       xml_dict['mujoco']['worldbody']['body']['body'][0]['body']['body']['geom']['@size'] = '0.046 {}'.format(.094 * bfo_r)
167 | 
168 |       ### fthigh
169 | 
170 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['geom']['@pos'] = '-.07 0 -.12'
171 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['geom']['@pos'] = '{} 0 {}'.format(-.07 * fth_r, -.12 * fth_r)
172 | 
173 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['geom']['@size'] = '0.046 .133'
174 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['geom']['@size'] = '0.046 {}'.format(.133 * fth_r)
175 | 
176 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['@pos'] = '-.14 0 -.24'
177 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['@pos'] = '{} 0 {}'.format(-.14 *fth_r, -.24 * fth_r)
178 | 
179 |       ### fshin
180 | 
181 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['geom']['@pos'] = '.065 0 -.09'
182 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['geom']['@pos'] = '{} 0 {}'.format(.065 * fsh_r, -.09 * fsh_r)
183 | 
184 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['geom']['@size'] = '0.046 .106'
185 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['geom']['@size'] = '0.046 {}'.format(.106 * fsh_r)
186 | 
187 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['body']['@pos'] = '.13 0 -.18'
188 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['body']['@pos'] = '{} 0 {}'.format(.13 * fsh_r, -.18 * fsh_r)
189 | 
190 |       ### ffoot
191 | 
192 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['body']['geom']['@pos'] = '.045 0 -.07'
193 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['body']['geom']['@pos'] = '{} 0 {}'.format(.045 * ffo_r, -.07 * ffo_r)
194 | 
195 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['body']['geom']['@size'] = '0.046 .07'
196 |       xml_dict['mujoco']['worldbody']['body']['body'][1]['body']['body']['geom']['@size'] = '0.046 {}'.format(.07 * ffo_r)
197 | 
198 |       xml_string = xmltodict.unparse(xml_dict, pretty=True)
199 |       with open(file, 'w') as fd:
200 |           fd.write(xml_string)
201 | 
202 |   def alive_bonus(self, z, pitch):
203 |     # Use contact other than feet to terminate episode: due to a lot of strange walks using knees
204 |     return 0 #+1 if np.abs(pitch) < 1.0 and not self.feet_contact[1] and not self.feet_contact[2] and not self.feet_contact[4] and not self.feet_contact[5] else -1
205 | 
206 |   def robot_specific_reset(self, bullet_client):
207 |     WalkerBase.robot_specific_reset(self, bullet_client)
208 |     self.jdict["bthigh"].power_coef = 120.0
209 |     self.jdict["bshin"].power_coef  = 90.0
210 |     self.jdict["bfoot"].power_coef  = 60.0
211 |     self.jdict["fthigh"].power_coef = 90.0 #140?
212 |     self.jdict["fshin"].power_coef  = 60.0
213 |     self.jdict["ffoot"].power_coef  = 30.0
214 |     body_pose = self.robot_body.pose()
215 |     x, y, z = body_pose.xyz()
216 |     self._initial_z = z
217 | 
218 |   def reset_design(self, bullet_client, design):
219 |     self._adapted_xml_file = tempfile.NamedTemporaryFile(delete=False, prefix='halfcheetah_', suffix='.xml')
220 |     self._adapted_xml_filepath = self._adapted_xml_file.name
221 |     file = self._adapted_xml_filepath
222 |     self._adapted_xml_file.close()
223 |     atexit.register(cleanup_func_for_tmp, self._adapted_xml_filepath)
224 | 
225 |     self.adapt_xml(file, design)
226 |     self.model_xml = file
227 | 
228 |     self.doneLoading = 0
229 |     self.parts = None
230 |     self.objects = []
231 |     self.jdict = None
232 |     self.ordered_joints = None
233 |     self.robot_body = None
234 |     self.robot_name = "torso"
235 |     bullet_client.resetSimulation()
236 |     self.reset(bullet_client)
237 |     # self.reset(self._p)
238 | 
239 |   def calc_potential(self):
240 |     return 0
241 | 
242 |   def calc_state(self):
243 |     j = np.array([j.current_relative_position() for j in self.ordered_joints], dtype=np.float32).flatten()
244 |     # even elements [0::2] position, scaled to -1..+1 between limits
245 |     # odd elements  [1::2] angular speed, scaled to show -1..+1
246 |     self.joint_pos = j[0::2]
247 |     self.joint_speeds = j[1::2]
248 |     body_pose = self.robot_body.pose()
249 |     x, y, z = body_pose.xyz()
250 |     z = z - self._initial_z
251 |     r, p, yaw = self.body_rpy = body_pose.rpy()
252 |     xv, yv, zv = self.robot_body.speed()
253 |     vr, vp, vy = self.robot_body.speed_angular()
254 |     state = np.array([xv, vp, zv, p, z])
255 |     state = np.append(self.joint_speeds, state)
256 |     state = np.append(self.joint_pos, state)
257 |     return state
258 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Kevin Sebastian Luck
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fast Evolution through Actor-Critic Reinforcement Learning
 2 | This is the official repository providing a refactored implementation of the data-driven
 3 | design optimization method presented in the paper [**Data-efficient Co-Adaptation of Morphology and Behaviour with Deep Reinforcement Learning**](https://research.fb.com/publications/data-efficient-co-adaptation-of-morphology-and-behaviour-with-deep-reinforcement-learning/).
 4 | This paper was presented on the Conference on Robot Learning in 2019.
 5 | The website for this project can be found [here](https://sites.google.com/view/drl-coadaptation/home).
 6 | 
 7 | At the moment, the repository contains a basic implementation of the proposed algorithm and its baseline. We use particle swarm optimization on the Q-function, which is used as a surrogate function predicting the performance of design candidates and, thus, avoiding the necessity to simulate/evaluate design candidates. The baseline uses also particle swarm optimization but evaluates design candidates in simulation instead.
 8 | 
 9 | The current environment provided is Half-Cheetah, using pybullet, for which we have to learn effective movement strategies and the optimal leg lengths, maximizing the performance of the agent.
10 | 
11 | Additional methods and environments which are shown in the paper will be added over time and the structure of the repository might change in the future.
12 | 
13 | ## Citation
14 | If you use this code in your research, please cite
15 | ```
16 | @inproceedings{luck2019coadapt,
17 |   title={Data-efficient Co-Adaptation of Morphology and Behaviour with Deep Reinforcement Learning},
18 |   author={Luck, Kevin Sebastian and Ben Amor, Heni and Calandra, Roberto},
19 |   booktitle={Conference on Robot Learning},
20 |   year={2019}
21 | }
22 | ```
23 | 
24 | ## Acknowledgements of Previous Work
25 | This project would have been harder to implement without the great work of
26 | the developers behind rlkit and pybullet.
27 | 
28 | The reinforcement learning loop makes extensive use of rlkit, a framework developed
29 | and maintained by Vitchyr Pong. You find this repository [here](https://github.com/vitchyr/rlkit).
30 | We made slight adaptations to the Soft-Actor-Critic algorithm used in this repository.
31 | 
32 | Tasks were simulated in [PyBullet](https://pybullet.org/wordpress/), the
33 | repository can be found [here](https://github.com/bulletphysics/bullet3/tree/master/examples/pybullet).
34 | Adaptations were made to the files found in pybullet_evo to enable the dynamic adaptation
35 | of design parameters during the training process.
36 | 
37 | ## Installation
38 | 
39 | Make sure that PyTorch is installed. You find more information here: https://pytorch.org/
40 | 
41 | First, clone this repository to your local computer as usual.
42 | Then, install the required packages via pip by executing `pip3 install -r requirements.txt`.
43 | 
44 | The current version of Coadapt is using now the latest version of rlkit by  [Vitchyr Pong](https://github.com/vitchyr/rlkit).
45 | 
46 | Clone the rlkit with
47 | ```bash
48 | git clone https://github.com/vitchyr/rlkit.git
49 | ```
50 | Now, set in your terminal the environment variable PYTHONPATH with
51 | ```bash
52 | export PYTHONPATH=/path/to/rlkit/
53 | ```
54 | where the folder `/path/to/rlkit` contains the folder `rlkit`. This enables us
55 | to import rlkit with `import rlkit`.
56 | Alternatively, follow the installations guidelines you can find in the rlkit repository.
57 | 
58 | You may have to set the environmental variable every time you open a new terminal.
59 | 
60 | ## Starting experiments
61 | 
62 | After setting the environmental variable and installing the packages you can
63 | proceed to run the experiments.
64 | There are two experimental configurations already set up for you in `experiment_configs.py`.
65 | You can execute them with
66 | ```bash
67 | python3 main.py sac_pso_batch
68 | ```
69 | and
70 | ```bash
71 | python3 main.py sac_pso_sim
72 | ```
73 | 
74 | You may change the configs or add new ones. Make sure to add new configurations to
75 | the `config_dict` in `experiment_configs.py`.
76 | 
77 | ## Data logging
78 | If you execute these commands, they will automatically create directories in which
79 | the performance and achieved rewards will be stored. Each experiment creates
80 | a specific folder with the current date/time and a random string as name.
81 | You can find in this folder a copy of the config you executed and one csv file
82 | for each design on which the reinforcement learning algorithm was executed.
83 | Each csv file contains three rows: The type of the design (either 'Initial', 'Optimized' or 'Random');
84 | The design vector; And the subsequent, cumulative rewards for each episode/trial.
85 | 
86 | The file `ADDVIZFILE` provieds a basic jupyter notebook to visualize the collected
87 | data.
88 | 
89 | ## Changelog
90 |  - 20 July 2022:
91 |    - Fixed an issue where the species/individual replay buffer was never reset.
92 |  - 9 August 2020:
93 |    - We use now the current version of rlkit. Alpha parameter can now be set via the experiment_config file.
94 |  - 19 June 2020:
95 |     - Updated the repository to use the current version of rlkit. However, we cannot set our own alpha parameter for SAC, so it might come with some initial performance decrease.
96 |     - Added a basic Video recorder which retains the best 5 episodes recorded
97 |     - Fixed a bug which was introduced when refactoring the code
98 | 


--------------------------------------------------------------------------------
/RL/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksluck/Coadaptation/c45d1d682262ef04cb797122d6103050b8d8b826/RL/__init__.py


--------------------------------------------------------------------------------
/RL/evoreplay.py:
--------------------------------------------------------------------------------
 1 | from rlkit.data_management.replay_buffer import ReplayBuffer
 2 | from rlkit.data_management.env_replay_buffer import EnvReplayBuffer
 3 | import numpy as np
 4 | 
 5 | class EvoReplayLocalGlobalStart(ReplayBuffer):
 6 |     def __init__(self, env, max_replay_buffer_size_species, max_replay_buffer_size_population):
 7 |         self._species_buffer = EnvReplayBuffer(env=env, max_replay_buffer_size=max_replay_buffer_size_species)
 8 |         self._population_buffer = EnvReplayBuffer(env=env, max_replay_buffer_size=max_replay_buffer_size_population)
 9 |         self._init_state_buffer = EnvReplayBuffer(env=env, max_replay_buffer_size=max_replay_buffer_size_population)
10 |         self._env = env
11 |         self._max_replay_buffer_size_species = max_replay_buffer_size_species
12 |         self._mode = "species"
13 |         self._ep_counter = 0
14 |         self._expect_init_state = True
15 |         print("Use EvoReplayLocalGlobalStart replay buffer")
16 | 
17 |     def add_sample(self, observation, action, reward, next_observation,
18 |                    terminal, **kwargs):
19 |         """
20 |         Add a transition tuple.
21 |         """
22 |         if True or self._mode == "species": # This is deactivated now
23 |             self._species_buffer.add_sample(observation=observation, action=action, reward=reward, next_observation=next_observation,
24 |                            terminal=terminal, env_info={}, **kwargs)
25 |             if self._expect_init_state:
26 |                 self._init_state_buffer.add_sample(observation=observation, action=action, reward=reward, next_observation=next_observation,
27 |                                terminal=terminal, env_info={}, **kwargs)
28 |                 self._init_state_buffer.terminate_episode()
29 |                 self._expect_init_state = False
30 | 
31 |             if self._ep_counter >= 0:
32 |                 self._population_buffer.add_sample(observation=observation, action=action, reward=reward, next_observation=next_observation,
33 |                            terminal=terminal, env_info={}, **kwargs)
34 | 
35 |     def terminate_episode(self):
36 |         """
37 |         Let the replay buffer know that the episode has terminated in case some
38 |         special book-keeping has to happen.
39 |         :return:
40 |         """
41 |         if self._mode == "species":
42 |             self._species_buffer.terminate_episode()
43 |             self._population_buffer.terminate_episode()
44 |             self._ep_counter += 1
45 |             self._expect_init_state = True
46 | 
47 |     def num_steps_can_sample(self, **kwargs):
48 |         """
49 |         :return: # of unique items that can be sampled.
50 |         """
51 |         if self._mode == "species":
52 |             return self._species_buffer.num_steps_can_sample(**kwargs)
53 |         elif  self._mode == "population":
54 |             return self._population_buffer.num_steps_can_sample(**kwargs)
55 |         else:
56 |             pass
57 | 
58 |     def random_batch(self, batch_size):
59 |         """
60 |         Return a batch of size `batch_size`.
61 |         :param batch_size:
62 |         :return:
63 |         """
64 |         if self._mode == "species":
65 |             # return self._species_buffer.random_batch(batch_size)
66 |             species_batch_size = int(np.floor(batch_size * 0.9))
67 |             pop_batch_size = int(np.ceil(batch_size * 0.1))
68 |             pop = self._population_buffer.random_batch(pop_batch_size)
69 |             spec = self._species_buffer.random_batch(species_batch_size)
70 |             for key, item in pop.items():
71 |                 pop[key] = np.concatenate([pop[key], spec[key]], axis=0)
72 |             return pop
73 |         elif self._mode == "population":
74 |             return self._population_buffer.random_batch(batch_size)
75 |         elif self._mode == "start":
76 |             return self._init_state_buffer.random_batch(batch_size)
77 |         else:
78 |             pass
79 | 
80 |     def set_mode(self, mode):
81 |         if mode == "species":
82 |             self._mode = mode
83 |         elif mode == "population":
84 |             self._mode = mode
85 |         elif mode == "start":
86 |             self._mode = mode
87 |         else:
88 |             print("No known mode :(")
89 | 
90 |     def reset_species_buffer(self):
91 |         self._species_buffer = EnvReplayBuffer(env = self._env, max_replay_buffer_size=self._max_replay_buffer_size_species)
92 |         self._ep_counter = 0
93 | 


--------------------------------------------------------------------------------
/RL/rl_algorithm.py:
--------------------------------------------------------------------------------
 1 | import rlkit.torch.pytorch_util as ptu
 2 | 
 3 | class RL_algorithm(object):
 4 | 
 5 |     def __init__(self, config, env, replay, networks):
 6 |         self._config = config
 7 |         self.file_str = config['data_folder_experiment']
 8 | 
 9 |         self._env = env
10 |         self._networks = networks
11 |         self._replay = replay
12 | 
13 |         if 'use_only_global_networks' in config.keys():
14 |             self._use_only_global_networks = config['use_only_global_networks']
15 |         else:
16 |             self._use_only_global_networks = False
17 | 
18 |     def episode_init(self):
19 |         pass
20 | 


--------------------------------------------------------------------------------
/RL/soft_actor.py:
--------------------------------------------------------------------------------
  1 | from rlkit.torch.sac.policies import TanhGaussianPolicy
  2 | # from rlkit.torch.sac.sac import SoftActorCritic
  3 | from rlkit.torch.networks import FlattenMlp
  4 | import numpy as np
  5 | from .rl_algorithm import RL_algorithm
  6 | from rlkit.torch.sac.sac import SACTrainer as SoftActorCritic_rlkit
  7 | import rlkit.torch.pytorch_util as ptu
  8 | import torch
  9 | import utils
 10 | 
 11 | # networks = {individual:, population:}
 12 | class SoftActorCritic(RL_algorithm):
 13 | 
 14 |     def __init__(self, config, env, replay, networks):
 15 |         """ Bascally a wrapper class for SAC from rlkit.
 16 | 
 17 |         Args:
 18 |             config: Configuration dictonary
 19 |             env: Environment
 20 |             replay: Replay buffer
 21 |             networks: dict containing two sub-dicts, 'individual' and 'population'
 22 |                 which contain the networks.
 23 | 
 24 |         """
 25 |         super().__init__(config, env, replay, networks)
 26 | 
 27 |         self._variant_pop = config['rl_algorithm_config']['algo_params_pop']
 28 |         self._variant_spec = config['rl_algorithm_config']['algo_params']
 29 | 
 30 |         self._ind_qf1 = networks['individual']['qf1']
 31 |         self._ind_qf2 = networks['individual']['qf2']
 32 |         self._ind_qf1_target = networks['individual']['qf1_target']
 33 |         self._ind_qf2_target = networks['individual']['qf2_target']
 34 |         self._ind_policy = networks['individual']['policy']
 35 | 
 36 |         self._pop_qf1 = networks['population']['qf1']
 37 |         self._pop_qf2 = networks['population']['qf2']
 38 |         self._pop_qf1_target = networks['population']['qf1_target']
 39 |         self._pop_qf2_target = networks['population']['qf2_target']
 40 |         self._pop_policy = networks['population']['policy']
 41 | 
 42 |         self._batch_size = config['rl_algorithm_config']['batch_size']
 43 |         self._nmbr_indiv_updates = config['rl_algorithm_config']['indiv_updates']
 44 |         self._nmbr_pop_updates = config['rl_algorithm_config']['pop_updates']
 45 | 
 46 |         self._algorithm_ind = SoftActorCritic_rlkit(
 47 |             env=self._env,
 48 |             policy=self._ind_policy,
 49 |             qf1=self._ind_qf1,
 50 |             qf2=self._ind_qf2,
 51 |             target_qf1=self._ind_qf1_target,
 52 |             target_qf2=self._ind_qf2_target,
 53 |             use_automatic_entropy_tuning = False,
 54 |             **self._variant_spec
 55 |         )
 56 | 
 57 |         self._algorithm_pop = SoftActorCritic_rlkit(
 58 |             env=self._env,
 59 |             policy=self._pop_policy,
 60 |             qf1=self._pop_qf1,
 61 |             qf2=self._pop_qf2,
 62 |             target_qf1=self._pop_qf1_target,
 63 |             target_qf2=self._pop_qf2_target,
 64 |             use_automatic_entropy_tuning = False,
 65 |             **self._variant_pop
 66 |         )
 67 | 
 68 |         # self._algorithm_ind.to(ptu.device)
 69 |         # self._algorithm_pop.to(ptu.device)
 70 | 
 71 |     def episode_init(self):
 72 |         """ Initializations to be done before the first episode.
 73 | 
 74 |         In this case basically creates a fresh instance of SAC for the
 75 |         individual networks and copies the values of the target network.
 76 |         """
 77 |         self._algorithm_ind = SoftActorCritic_rlkit(
 78 |             env=self._env,
 79 |             policy=self._ind_policy,
 80 |             qf1=self._ind_qf1,
 81 |             qf2=self._ind_qf2,
 82 |             target_qf1=self._ind_qf1_target,
 83 |             target_qf2=self._ind_qf2_target,
 84 |             use_automatic_entropy_tuning = False,
 85 |             # alt_alpha = self._alt_alpha,
 86 |             **self._variant_spec
 87 |         )
 88 |         if self._config['rl_algorithm_config']['copy_from_gobal']:
 89 |             utils.copy_pop_to_ind(networks_pop=self._networks['population'], networks_ind=self._networks['individual'])
 90 |         # We have only to do this becasue the version of rlkit which we use
 91 |         # creates internally a target network
 92 |         # vf_dict = self._algorithm_pop.target_vf.state_dict()
 93 |         # self._algorithm_ind.target_vf.load_state_dict(vf_dict)
 94 |         # self._algorithm_ind.target_vf.eval()
 95 |         # self._algorithm_ind.to(ptu.device)
 96 | 
 97 |     def single_train_step(self, train_ind=True, train_pop=False):
 98 |         """ A single trianing step.
 99 | 
100 |         Args:
101 |             train_ind: Boolean. If true the individual networks will be trained.
102 |             train_pop: Boolean. If true the population networks will be trained.
103 |         """
104 |         if train_ind:
105 |           # Get only samples from the species buffer
106 |           self._replay.set_mode('species')
107 |           # self._algorithm_ind.num_updates_per_train_call = self._variant_spec['num_updates_per_epoch']
108 |           # self._algorithm_ind._try_to_train()
109 |           for _ in range(self._nmbr_indiv_updates):
110 |               batch = self._replay.random_batch(self._batch_size)
111 |               self._algorithm_ind.train(batch)
112 | 
113 |         if train_pop:
114 |           # Get only samples from the population buffer
115 |           self._replay.set_mode('population')
116 |           # self._algorithm_pop.num_updates_per_train_call = self._variant_pop['num_updates_per_epoch']
117 |           # self._algorithm_pop._try_to_train()
118 |           for _ in range(self._nmbr_pop_updates):
119 |               batch = self._replay.random_batch(self._batch_size)
120 |               self._algorithm_pop.train(batch)
121 | 
122 |     @staticmethod
123 |     def create_networks(env, config):
124 |         """ Creates all networks necessary for SAC.
125 | 
126 |         These networks have to be created before instantiating this class and
127 |         used in the constructor.
128 | 
129 |         Args:
130 |             config: A configuration dictonary containing population and
131 |                 individual networks
132 | 
133 |         Returns:
134 |             A dictonary which contains the networks.
135 |         """
136 |         network_dict = {
137 |             'individual' : SoftActorCritic._create_networks(env=env, config=config),
138 |             'population' : SoftActorCritic._create_networks(env=env, config=config),
139 |         }
140 |         return network_dict
141 | 
142 |     @staticmethod
143 |     def _create_networks(env, config):
144 |         """ Creates all networks necessary for SAC.
145 | 
146 |         These networks have to be created before instantiating this class and
147 |         used in the constructor.
148 | 
149 |         TODO: Maybe this should be reworked one day...
150 | 
151 |         Args:
152 |             config: A configuration dictonary.
153 | 
154 |         Returns:
155 |             A dictonary which contains the networks.
156 |         """
157 |         obs_dim = int(np.prod(env.observation_space.shape))
158 |         action_dim = int(np.prod(env.action_space.shape))
159 |         net_size = config['rl_algorithm_config']['net_size']
160 |         hidden_sizes = [net_size] * config['rl_algorithm_config']['network_depth']
161 |         # hidden_sizes = [net_size, net_size, net_size]
162 |         qf1 = FlattenMlp(
163 |             hidden_sizes=hidden_sizes,
164 |             input_size=obs_dim + action_dim,
165 |             output_size=1,
166 |         ).to(device=ptu.device)
167 |         qf2 = FlattenMlp(
168 |             hidden_sizes=hidden_sizes,
169 |             input_size=obs_dim + action_dim,
170 |             output_size=1,
171 |         ).to(device=ptu.device)
172 |         qf1_target = FlattenMlp(
173 |             hidden_sizes=hidden_sizes,
174 |             input_size=obs_dim + action_dim,
175 |             output_size=1,
176 |         ).to(device=ptu.device)
177 |         qf2_target = FlattenMlp(
178 |             hidden_sizes=hidden_sizes,
179 |             input_size=obs_dim + action_dim,
180 |             output_size=1,
181 |         ).to(device=ptu.device)
182 |         policy = TanhGaussianPolicy(
183 |             hidden_sizes=hidden_sizes,
184 |             obs_dim=obs_dim,
185 |             action_dim=action_dim,
186 |         ).to(device=ptu.device)
187 | 
188 |         clip_value = 1.0
189 |         for p in qf1.parameters():
190 |             p.register_hook(lambda grad: torch.clamp(grad, -clip_value, clip_value))
191 |         for p in qf2.parameters():
192 |             p.register_hook(lambda grad: torch.clamp(grad, -clip_value, clip_value))
193 |         for p in policy.parameters():
194 |             p.register_hook(lambda grad: torch.clamp(grad, -clip_value, clip_value))
195 | 
196 |         return {'qf1' : qf1, 'qf2' : qf2, 'qf1_target' : qf1_target, 'qf2_target' : qf2_target, 'policy' : policy}
197 | 
198 |     @staticmethod
199 |     def get_q_network(networks):
200 |         """ Returns the q network from a dict of networks.
201 | 
202 |         This method extracts the q-network from the dictonary of networks
203 |         created by the function create_networks.
204 | 
205 |         Args:
206 |             networks: Dict containing the networks.
207 | 
208 |         Returns:
209 |             The q-network as torch object.
210 |         """
211 |         return networks['qf1']
212 | 
213 |     @staticmethod
214 |     def get_policy_network(networks):
215 |         """ Returns the policy network from a dict of networks.
216 | 
217 |         This method extracts the policy network from the dictonary of networks
218 |         created by the function create_networks.
219 | 
220 |         Args:
221 |             networks: Dict containing the networks.
222 | 
223 |         Returns:
224 |             The policy network as torch object.
225 |         """
226 |         return networks['policy']
227 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksluck/Coadaptation/c45d1d682262ef04cb797122d6103050b8d8b826/__init__.py


--------------------------------------------------------------------------------
/coadapt.py:
--------------------------------------------------------------------------------
  1 | from RL.soft_actor import SoftActorCritic
  2 | from DO.pso_batch import PSO_batch
  3 | from DO.pso_sim import PSO_simulation
  4 | from Environments import evoenvs as evoenvs
  5 | import utils
  6 | import time
  7 | from RL.evoreplay import EvoReplayLocalGlobalStart
  8 | import numpy as np
  9 | import os
 10 | import csv
 11 | import torch
 12 | 
 13 | def select_design_opt_alg(alg_name):
 14 |     """ Selects the design optimization method.
 15 | 
 16 |     Args:
 17 |         alg_name: String which states the design optimization method. Can be
 18 |             `pso_batch` or `pso_sim`.
 19 | 
 20 |     Returns:
 21 |         The class of a design optimization method.
 22 | 
 23 |     Raises:
 24 |         ValueError: If the string alg_name is unknown.
 25 |     """
 26 |     if alg_name == "pso_batch":
 27 |         return PSO_batch
 28 |     elif alg_name == "pso_sim":
 29 |         return PSO_simulation
 30 |     else:
 31 |         raise ValueError("Design Optimization method not found.")
 32 | 
 33 | def select_environment(env_name):
 34 |     """ Selects an environment.
 35 | 
 36 |     Args:
 37 |         env_name: Name (string) of the environment on which the experiment is to
 38 |             be executed. Can be `HalfCheetah`.
 39 | 
 40 |     Returns:
 41 |         The class of an environment
 42 | 
 43 |     Raises:
 44 |         ValueError: If the string env_name is unknown.
 45 | 
 46 |     """
 47 |     if env_name == 'HalfCheetah':
 48 |         return evoenvs.HalfCheetahEnv
 49 |     else:
 50 |         raise ValueError("Environment class not found.")
 51 | 
 52 | def select_rl_alg(rl_name):
 53 |     """ Selectes the reinforcement learning method.
 54 | 
 55 |     Args:
 56 |         rl_name: Name (string) of the rl method.
 57 | 
 58 |     Returns:
 59 |         The class of a reinforcement learning method.
 60 | 
 61 |     Raises:
 62 |         ValueError: If the string rl_name is unknown.
 63 |     """
 64 |     if rl_name == 'SoftActorCritic':
 65 |         return SoftActorCritic
 66 |     else:
 67 |         raise ValueError('RL method not fund.')
 68 | 
 69 | class Coadaptation(object):
 70 |     """ Basic Co-Adaptaton class.
 71 | 
 72 |     """
 73 | 
 74 |     def __init__(self, config):
 75 |         """
 76 |         Args:
 77 |             config: A config dictonary.
 78 | 
 79 |         """
 80 | 
 81 |         self._config = config
 82 |         utils.move_to_cuda(self._config)
 83 | 
 84 |         # TODO This should not depend on rl_algorithm_config in the future
 85 |         self._episode_length = self._config['steps_per_episodes']
 86 |         self._reward_scale = 1.0 #self._config['rl_algorithm_config']['algo_params']['reward_scale']
 87 | 
 88 |         self._env_class = select_environment(self._config['env']['env_name'])
 89 |         self._env = evoenvs.HalfCheetahEnv(config=self._config)
 90 | 
 91 |         self._replay = EvoReplayLocalGlobalStart(self._env,
 92 |             max_replay_buffer_size_species=int(1e6),
 93 |             max_replay_buffer_size_population=int(1e7))
 94 | 
 95 |         self._rl_alg_class = select_rl_alg(self._config['rl_method'])
 96 | 
 97 |         self._networks = self._rl_alg_class.create_networks(env=self._env, config=config)
 98 | 
 99 |         self._rl_alg = self._rl_alg_class(config=self._config, env=self._env , replay=self._replay, networks=self._networks)
100 | 
101 |         self._do_alg_class = select_design_opt_alg(self._config['design_optim_method'])
102 |         self._do_alg = self._do_alg_class(config=self._config, replay=self._replay, env=self._env)
103 | 
104 |         # if self._config['use_cpu_for_rollout']:
105 |         #     utils.move_to_cpu()
106 |         # else:
107 |         #     utils.move_to_cuda(self._config)
108 |         # # TODO this is a temp fix - should be cleaned up, not so hppy with it atm
109 |         # self._policy_cpu = self._rl_alg_class.get_policy_network(SoftActorCritic.create_networks(env=self._env, config=config)['individual'])
110 |         utils.move_to_cuda(self._config)
111 | 
112 |         self._last_single_iteration_time = 0
113 |         self._design_counter = 0
114 |         self._episode_counter = 0
115 |         self._data_design_type = 'Initial'
116 | 
117 |     def initialize_episode(self):
118 |         """ Initializations required before the first episode.
119 | 
120 |         Should be called before the first episode of a new design is
121 |         executed. Resets variables such as _data_rewards for logging purposes
122 |         etc.
123 | 
124 |         """
125 |         # self._rl_alg.initialize_episode(init_networks = True, copy_from_gobal = True)
126 |         self._rl_alg.episode_init()
127 |         self._replay.reset_species_buffer()
128 | 
129 |         self._data_rewards = []
130 |         self._episode_counter = 0
131 | 
132 | 
133 |     def single_iteration(self):
134 |         """ A single iteration.
135 | 
136 |         Makes all necessary function calls for a single iterations such as:
137 |             - Collecting training data
138 |             - Executing a training step
139 |             - Evaluate the current policy
140 |             - Log data
141 | 
142 |         """
143 |         print("Time for one iteration: {}".format(time.time() - self._last_single_iteration_time))
144 |         self._last_single_iteration_time = time.time()
145 |         self._replay.set_mode("species")
146 |         self.collect_training_experience()
147 |         # TODO Change here to train global only after five designs
148 |         train_pop = self._design_counter > 3
149 |         if self._episode_counter >= self._config['initial_episodes']:
150 |             self._rl_alg.single_train_step(train_ind=True, train_pop=train_pop)
151 |         self._episode_counter += 1
152 |         self.execute_policy()
153 |         self.save_logged_data()
154 |         self.save_networks()
155 | 
156 |     def collect_training_experience(self):
157 |         """ Collect training data.
158 | 
159 |         This function executes a single episode in the environment using the
160 |         exploration strategy/mechanism and the policy.
161 |         The data, i.e. state-action-reward-nextState, is stored in the replay
162 |         buffer.
163 | 
164 |         """
165 |         state = self._env.reset()
166 |         nmbr_of_steps = 0
167 |         done = False
168 | 
169 |         if self._episode_counter < self._config['initial_episodes']:
170 |             policy_gpu_ind = self._rl_alg_class.get_policy_network(self._networks['population'])
171 |         else:
172 |             policy_gpu_ind = self._rl_alg_class.get_policy_network(self._networks['individual'])
173 |         # self._policy_cpu = utils.copy_network(network_to=self._policy_cpu, network_from=policy_gpu_ind, config=self._config, force_cpu=self._config['use_cpu_for_rollout'])
174 |         self._policy_cpu = policy_gpu_ind
175 | 
176 |         if self._config['use_cpu_for_rollout']:
177 |             utils.move_to_cpu()
178 |         else:
179 |             utils.move_to_cuda(self._config)
180 | 
181 |         while not(done) and nmbr_of_steps <= self._episode_length:
182 |             nmbr_of_steps += 1
183 |             action, _ = self._policy_cpu.get_action(state)
184 |             new_state, reward, done, info = self._env.step(action)
185 |             # TODO this has to be fixed _variant_spec
186 |             reward = reward * self._reward_scale
187 |             terminal = np.array([done])
188 |             reward = np.array([reward])
189 |             self._replay.add_sample(observation=state, action=action, reward=reward, next_observation=new_state,
190 |                            terminal=terminal)
191 |             state = new_state
192 |         self._replay.terminate_episode()
193 |         utils.move_to_cuda(self._config)
194 | 
195 |     def execute_policy(self):
196 |         """ Evaluates the current deterministic policy.
197 | 
198 |         Evaluates the current policy in the environment by unrolling a single
199 |         episode in the environment.
200 |         The achieved cumulative reward is logged.
201 | 
202 |         """
203 |         state = self._env.reset()
204 |         done = False
205 |         reward_ep = 0.0
206 |         reward_original = 0.0
207 |         action_cost = 0.0
208 |         nmbr_of_steps = 0
209 | 
210 |         if self._episode_counter < self._config['initial_episodes']:
211 |             policy_gpu_ind = self._rl_alg_class.get_policy_network(self._networks['population'])
212 |         else:
213 |             policy_gpu_ind = self._rl_alg_class.get_policy_network(self._networks['individual'])
214 |         # self._policy_cpu = utils.copy_network(network_to=self._policy_cpu, network_from=policy_gpu_ind, config=self._config, force_cpu=self._config['use_cpu_for_rollout'])
215 |         self._policy_cpu = policy_gpu_ind
216 | 
217 |         if self._config['use_cpu_for_rollout']:
218 |             utils.move_to_cpu()
219 |         else:
220 |             utils.move_to_cuda(self._config)
221 | 
222 |         while not(done) and nmbr_of_steps <= self._episode_length:
223 |             nmbr_of_steps += 1
224 |             action, _ = self._policy_cpu.get_action(state, deterministic=True)
225 |             new_state, reward, done, info = self._env.step(action)
226 |             action_cost += info['orig_action_cost']
227 |             reward_ep += float(reward)
228 |             reward_original += float(info['orig_reward'])
229 |             state = new_state
230 |         utils.move_to_cuda(self._config)
231 |         # Do something here to log the results
232 |         self._data_rewards.append(reward_ep)
233 | 
234 |     def save_networks(self):
235 |         """ Saves the networks on the disk.
236 |         """
237 |         if not self._config['save_networks']:
238 |             return
239 | 
240 |         checkpoints_pop = {}
241 |         for key, net in self._networks['population'].items():
242 |             checkpoints_pop[key] = net.state_dict()
243 | 
244 |         checkpoints_ind = {}
245 |         for key, net in self._networks['individual'].items():
246 |             checkpoints_ind[key] = net.state_dict()
247 | 
248 |         checkpoint = {
249 |             'population' : checkpoints_pop,
250 |             'individual' : checkpoints_ind,
251 |         }
252 |         file_path = os.path.join(self._config['data_folder_experiment'], 'checkpoints')
253 |         if not os.path.exists(file_path):
254 |           os.makedirs(file_path)
255 |         torch.save(checkpoint, os.path.join(file_path, 'checkpoint_design_{}.chk'.format(self._design_counter)))
256 | 
257 |     def load_networks(self, path):
258 |         """ Loads netwokrs from the disk.
259 |         """
260 |         model_data = torch.load(path) #, map_location=ptu.device)
261 | 
262 |         model_data_pop = model_data['population']
263 |         for key, net in self._networks['population'].items():
264 |             params = model_data_pop[key]
265 |             net.load_state_dict(params)
266 | 
267 |         model_data_ind = model_data['individual']
268 |         for key, net in self._networks['individual'].items():
269 |             params = model_data_ind[key]
270 |             net.load_state_dict(params)
271 | 
272 |     def save_logged_data(self):
273 |         """ Saves the logged data to the disk as csv files.
274 | 
275 |         This function creates a log-file in csv format on the disk. For each
276 |         design an individual log-file is creates in the experient-directory.
277 |         The first row states if the design was one of the initial designs
278 |         (as given by the environment), a random design or an optimized design.
279 |         The second row gives the design parameters (eta). The third row
280 |         contains all subsequent cumulative rewards achieved by the policy
281 |         throughout the reinforcement learning process on the current design.
282 |         """
283 |         file_path = self._config['data_folder_experiment']
284 |         current_design = self._env.get_current_design()
285 | 
286 |         with open(
287 |             os.path.join(file_path,
288 |                 'data_design_{}.csv'.format(self._design_counter)
289 |                 ), 'w') as fd:
290 |             cwriter = csv.writer(fd)
291 |             cwriter.writerow(['Design Type:', self._data_design_type])
292 |             cwriter.writerow(current_design)
293 |             cwriter.writerow(self._data_rewards)
294 | 
295 |     def run(self):
296 |         """ Runs the Fast Evolution through Actor-Critic RL algorithm.
297 | 
298 |         First the initial design loop is executed in which the rl-algorithm
299 |         is exeuted on the initial designs. Then the design-optimization
300 |         process starts.
301 |         It is possible to have different numbers of iterations for initial
302 |         designs and the design optimization process.
303 |         """
304 |         iterations_init = self._config['iterations_init']
305 |         iterations = self._config['iterations']
306 |         design_cycles = self._config['design_cycles']
307 |         exploration_strategy = self._config['exploration_strategy']
308 | 
309 |         self._intial_design_loop(iterations_init)
310 |         self._training_loop(iterations, design_cycles, exploration_strategy)
311 | 
312 |     def _training_loop(self, iterations, design_cycles, exploration_strategy):
313 |         """ The trianing process which optimizes designs and policies.
314 | 
315 |         The function executes the reinforcement learning loop and the design
316 |         optimization process.
317 | 
318 |         Args:
319 |             iterations: An integer stating the number of iterations/episodes
320 |                 to be used per design during the reinforcement learning loop.
321 |             design_cycles: Integer stating how many designs to evaluate.
322 |             exploration_strategy: String which describes which
323 |                 design exploration strategy to use. Is not used at the moment,
324 |                 i.e. only the (uniform) random exploration strategy is used.
325 | 
326 |         """
327 |         self.initialize_episode()
328 |         # TODO fix the following
329 |         initial_state = self._env._env.reset()
330 | 
331 |         self._data_design_type = 'Optimized'
332 | 
333 |         optimized_params = self._env.get_random_design()
334 |         q_network = self._rl_alg_class.get_q_network(self._networks['population'])
335 |         policy_network = self._rl_alg_class.get_policy_network(self._networks['population'])
336 |         optimized_params = self._do_alg.optimize_design(design=optimized_params, q_network=q_network, policy_network=policy_network)
337 |         optimized_params = list(optimized_params)
338 | 
339 |         for i in range(design_cycles):
340 |             self._design_counter += 1
341 |             self._env.set_new_design(optimized_params)
342 | 
343 |             # Reinforcement Learning
344 |             for _ in range(iterations):
345 |                 self.single_iteration()
346 | 
347 |             # Design Optimization
348 |             if i % 2 == 1:
349 |                 self._data_design_type = 'Optimized'
350 |                 q_network = self._rl_alg_class.get_q_network(self._networks['population'])
351 |                 policy_network = self._rl_alg_class.get_policy_network(self._networks['population'])
352 |                 optimized_params = self._do_alg.optimize_design(design=optimized_params, q_network=q_network, policy_network=policy_network)
353 |                 optimized_params = list(optimized_params)
354 |             else:
355 |                 self._data_design_type = 'Random'
356 |                 optimized_params = self._env.get_random_design()
357 |                 optimized_params = list(optimized_params)
358 |             self.initialize_episode()
359 | 
360 |     def _intial_design_loop(self, iterations):
361 |         """ The initial training loop for initial designs.
362 | 
363 |         The initial training loop in which no designs are optimized but only
364 |         initial designs, provided by the environment, are used.
365 | 
366 |         Args:
367 |             iterations: Integer stating how many training iterations/episodes
368 |                 to use per design.
369 | 
370 |         """
371 |         self._data_design_type = 'Initial'
372 |         for params in self._env.init_sim_params:
373 |             self._design_counter += 1
374 |             self._env.set_new_design(params)
375 |             self.initialize_episode()
376 | 
377 |             # Reinforcement Learning
378 |             for _ in range(iterations):
379 |                 self.single_iteration()
380 | 


--------------------------------------------------------------------------------
/data_notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# Import Libraries and define functions\n",
 10 |     "import csv\n",
 11 |     "import numpy as np\n",
 12 |     "from matplotlib import pyplot as plt\n",
 13 |     "plt.rcParams['figure.figsize'] = [15, 5]\n",
 14 |     "plt.rcParams.update({'font.size': 22})\n",
 15 |     "\n",
 16 |     "def set_style():\n",
 17 |     "    plt.minorticks_on()\n",
 18 |     "    plt.grid(b=True, which='minor', color='white', linestyle='-', alpha=0.4)\n",
 19 |     "    plt.style.use('ggplot')\n",
 20 |     "    plt.rcParams.update({'font.size': 22})\n",
 21 |     "    legend = plt.gca().legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode=\"expand\", borderaxespad=0., frameon = 1)\n",
 22 |     "    frame = legend.get_frame()\n",
 23 |     "    frame.set_facecolor('white')\n",
 24 |     "    frame.set_edgecolor('white')\n",
 25 |     "\n",
 26 |     "\n",
 27 |     "def save_fig(file_name):\n",
 28 |     "    plt.savefig(file_name, dpi=300, facecolor='w', edgecolor='w',\n",
 29 |     "        orientation='portrait', papertype=None, format=None,\n",
 30 |     "        transparent=False, bbox_inches='tight', pad_inches=0.02,\n",
 31 |     "        frameon=None, metadata=None)\n",
 32 |     "\n",
 33 |     "\n",
 34 |     "def read_file(path):\n",
 35 |     "  with open(path, 'r') as fd:\n",
 36 |     "    cwriter = csv.reader(fd)\n",
 37 |     "    data = []\n",
 38 |     "    skipped_first_row = False\n",
 39 |     "    for row in cwriter:\n",
 40 |     "      if not skipped_first_row:\n",
 41 |     "        skipped_first_row = True\n",
 42 |     "        continue\n",
 43 |     "      row_tmp = row\n",
 44 |     "      for idx in range(len(row_tmp)):\n",
 45 |     "        row_tmp[idx] = float(row_tmp[idx])\n",
 46 |     "      data.append(row_tmp)\n",
 47 |     "    config = data[0]\n",
 48 |     "    data = data[1]\n",
 49 |     "  return data, config\n",
 50 |     "\n",
 51 |     "\n",
 52 |     "def read_data_folder(path):\n",
 53 |     "  data_max = []\n",
 54 |     "  data_all = []\n",
 55 |     "  configs = []\n",
 56 |     "  for idx in range(1, DESIGNS+1):\n",
 57 |     "    data, config = read_file(path + '{}.csv'.format(idx))\n",
 58 |     "    for d in data:\n",
 59 |     "        data_all.append(d)\n",
 60 |     "    data_max.append(np.amax(data))\n",
 61 |     "    data = np.array(data)\n",
 62 |     "    data_all.append(data)\n",
 63 |     "    configs.append(config)\n",
 64 |     "  return data_all, data_max, configs\n",
 65 |     "    \n",
 66 |     "\n",
 67 |     "def compute_mean_std(paths):\n",
 68 |     "  data_max_ar = []\n",
 69 |     "  data_top_mean_ar = []\n",
 70 |     "  configs_ar = []\n",
 71 |     "  data_all_ar = []\n",
 72 |     "  for p in paths:\n",
 73 |     "    data_all, data_max, configs = read_data_folder(p)\n",
 74 |     "    data_all_ar.append(data_all)\n",
 75 |     "    data_max_ar.append(data_max)\n",
 76 |     "    configs_ar.append(configs)\n",
 77 |     "  data_max_mean = np.mean(data_max_ar, axis=0)\n",
 78 |     "  data_max_std = np.std(data_max_ar, axis=0)\n",
 79 |     "  return data_max_mean, data_max_std\n",
 80 |     "\n",
 81 |     "\n",
 82 |     "def plot_data_means(mean_data, std_data, color, label='', plot_random_std=False):\n",
 83 |     "  x = np.arange(1, len(mean_data) + 1)\n",
 84 |     "  init_data_mean = mean_data[0:INITIAL_DESIGNS+1]\n",
 85 |     "  init_data_std = std_data[0:INITIAL_DESIGNS+1]\n",
 86 |     "  init_x = x[0:INITIAL_DESIGNS+1]\n",
 87 |     "  optim_data_mean = mean_data[INITIAL_DESIGNS::2]\n",
 88 |     "  optim_data_std = std_data[INITIAL_DESIGNS::2]\n",
 89 |     "  optim_x = x[INITIAL_DESIGNS::2]\n",
 90 |     "  random_data_mean = mean_data[INITIAL_DESIGNS+1::2]\n",
 91 |     "  random_data_std = std_data[INITIAL_DESIGNS+1::2]\n",
 92 |     "  random_x = x[INITIAL_DESIGNS+1::2]\n",
 93 |     "  plt.plot(init_x, init_data_mean, color=color, alpha=0.3, linewidth=2.0)\n",
 94 |     "  plt.fill_between(init_x, init_data_mean - init_data_std, init_data_mean + init_data_std, facecolor=color, alpha=0.1)\n",
 95 |     "  plt.plot(optim_x, optim_data_mean, color=color, label=label, linewidth=2.0)\n",
 96 |     "  plt.fill_between(optim_x, optim_data_mean - optim_data_std, optim_data_mean + optim_data_std, facecolor=color, alpha=0.2)\n",
 97 |     "  #plt.plot(random_x, random_data_mean, color=color, linestyle='--')\n",
 98 |     "  if plot_random_std:\n",
 99 |     "    plt.fill_between(random_x, random_data_mean - random_data_std, random_data_mean + random_data_std, facecolor=color, alpha=0.2)\n",
100 |     "  set_style()\n",
101 |     "\n",
102 |     "    \n",
103 |     "def plot_data_means_optim_random(mean_data, std_data, color, label=''):\n",
104 |     "  x = np.arange(1, len(mean_data) + 1)\n",
105 |     "  init_data_mean = mean_data[0:INITIAL_DESIGNS+1]\n",
106 |     "  init_data_std = std_data[0:INITIAL_DESIGNS+1]\n",
107 |     "  init_x = x[0:INITIAL_DESIGNS+1]\n",
108 |     "  optim_data_mean = mean_data[INITIAL_DESIGNS::2]\n",
109 |     "  optim_data_std = std_data[INITIAL_DESIGNS::2]\n",
110 |     "  optim_x = x[INITIAL_DESIGNS::2]\n",
111 |     "  random_data_mean = mean_data[INITIAL_DESIGNS+1::2]\n",
112 |     "  random_data_std = std_data[INITIAL_DESIGNS+1::2]\n",
113 |     "  random_x = x[INITIAL_DESIGNS+1::2]\n",
114 |     "  plt.plot(init_x, init_data_mean, color=color, alpha=0.3, linewidth=2.0)\n",
115 |     "  plt.fill_between(init_x, init_data_mean - init_data_std, init_data_mean + init_data_std, facecolor=color, alpha=0.1)\n",
116 |     "  plt.plot(optim_x, optim_data_mean, color=color, linewidth=2.0, label=label)\n",
117 |     "  plt.fill_between(optim_x, optim_data_mean - optim_data_std, optim_data_mean + optim_data_std, facecolor=color, alpha=0.2)\n",
118 |     "  plt.plot(random_x, random_data_mean, color=color, label=label + ' Random Exploration', linestyle='--', linewidth=2.0)\n",
119 |     "  plt.fill_between(random_x, random_data_mean - random_data_std, random_data_mean + random_data_std, facecolor=color, alpha=0.2)\n",
120 |     "  set_style()\n",
121 |     "\n",
122 |     "\n"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 6,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "# Change these parameters as required\n",
132 |     "INITIAL_DESIGNS = 5\n",
133 |     "DESIGNS = 50\n",
134 |     "\n",
135 |     "# Proposed method using data-driven design optimization\n",
136 |     "# Change folder names\n",
137 |     "EXPERIMENT_FOLDERS_1 = [\n",
138 |     "    'data_exp_sac_pso_batch/Mon_Oct_21_05:47:55_2019__e37381f2',\n",
139 |     "    'data_exp_sac_pso_batch/Mon_Oct_21_05:48:30_2019__0041f8c5',\n",
140 |     "    'data_exp_sac_pso_batch/Mon_Oct_21_05:49:00_2019__5e2e3460',\n",
141 |     "    'data_exp_sac_pso_batch/Sun_Oct_20_23:36:49_2019__5db36c30',\n",
142 |     "    'data_exp_sac_pso_batch/Sun_Oct_20_23:37:06_2019__30302df7',\n",
143 |     "]\n",
144 |     "\n",
145 |     "# Baseline using simulations for the evaluation of design candidates\n",
146 |     "# Change folder names\n",
147 |     "EXPERIMENTS_FOLDERS_2 = [\n",
148 |     "    'data_exp_sac_pso_sim/Wed_Oct_23_05:49:05_2019__32509883',\n",
149 |     "    'data_exp_sac_pso_sim/Wed_Oct_23_05:49:22_2019__6878474d',\n",
150 |     "    'data_exp_sac_pso_sim/Wed_Oct_23_05:51:21_2019__36c0fafc',\n",
151 |     "    'data_exp_sac_pso_sim/Wed_Oct_23_05:51:33_2019__b21f861b',\n",
152 |     "    'data_exp_sac_pso_sim/Wed_Oct_23_05:51:45_2019__cc74a513',\n",
153 |     "]\n"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 7,
159 |    "metadata": {},
160 |    "outputs": [
161 |     {
162 |      "name": "stdout",
163 |      "output_type": "stream",
164 |      "text": [
165 |       "Using matplotlib backend: TkAgg\n"
166 |      ]
167 |     }
168 |    ],
169 |    "source": [
170 |     "%matplotlib"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 8,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "exp_files = ['{}/data_design_'.format(folder) for folder in EXPERIMENT_FOLDERS] # Novelty Search + PSO on Q\n",
180 |     "exp_mean, exp_std = compute_mean_std(exp_files)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 22,
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "# Plot performance of optimized designs and randomly selected designs\n",
190 |     "plot_data_means_optim_random(exp_mean, exp_std, color='red', label='Proposed Method')\n",
191 |     "plt.ylabel('Cum. Episodic Reward')\n",
192 |     "plt.xlabel('Designs')\n",
193 |     "plt.ylim([150,600])\n",
194 |     "plt.xlim([1,DESIGNS])\n",
195 |     "plt.show()\n",
196 |     "#save_fig('plots_HalfCheetah_random_vs_novelty_search.pdf')"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 23,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "# Compare two experiments/methods against each other\n",
206 |     "exp_files = ['{}/data_design_'.format(folder) for folder in EXPERIMENT_FOLDERS_1] # Novelty Search + PSO on Q\n",
207 |     "exp_mean, exp_std = compute_mean_std(exp_files)\n",
208 |     "plot_data_means(exp_mean, exp_std, color='red', label='Proposed Method')\n",
209 |     "\n",
210 |     "exp_files = ['{}/data_design_'.format(folder) for folder in EXPERIMENTS_FOLDERS_2] # Novelty Search + PSO on Q\n",
211 |     "exp_mean, exp_std = compute_mean_std(exp_files)\n",
212 |     "plot_data_means(exp_mean, exp_std, color='blue', label='Using Simulations')\n",
213 |     "plt.ylabel('Cum. Episodic Reward')\n",
214 |     "plt.xlabel('Designs')\n",
215 |     "plt.ylim([150,600])\n",
216 |     "plt.xlim([1,DESIGNS])\n",
217 |     "plt.show()"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {},
224 |    "outputs": [],
225 |    "source": []
226 |   }
227 |  ],
228 |  "metadata": {
229 |   "kernelspec": {
230 |    "display_name": "Python 3",
231 |    "language": "python",
232 |    "name": "python3"
233 |   },
234 |   "language_info": {
235 |    "codemirror_mode": {
236 |     "name": "ipython",
237 |     "version": 3
238 |    },
239 |    "file_extension": ".py",
240 |    "mimetype": "text/x-python",
241 |    "name": "python",
242 |    "nbconvert_exporter": "python",
243 |    "pygments_lexer": "ipython3",
244 |    "version": "3.6.8"
245 |   }
246 |  },
247 |  "nbformat": 4,
248 |  "nbformat_minor": 2
249 | }
250 | 


--------------------------------------------------------------------------------
/experiment_configs.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | sac_pso_batch = {
  4 |     'name' : 'Experiment 1: PSO Batch',         # Name of the experiment. Can be used just for reference later
  5 |     'data_folder' : 'data_exp_sac_pso_batch',   # Name of the folder in which data generated during the experiments are saved
  6 |     'nmbr_random_designs' : 0,                  # Number of random designs to run after the initial design TODO: Not used atm
  7 |     'iterations_init' : 300,                    # Number of episodes for all initial designs as provided by the environment class
  8 |     'iterations_random': 100,                   # Number of episodes for all random designs TODO: Not used atm
  9 |     'iterations' : 100,                         # Number of episodes to run for all designs after the initial
 10 |     'design_cycles' : 55,                       # Number of design adaptations after the initial designs
 11 |     'state_batch_size' : 32,                    # Size of the batch used during the design optimization process to estimate fitness of design
 12 |     'initial_episodes' : 3,                     # Number of initial episodes for each design before we start the training of the individual networks. Useful if steps per episode is low and we want to fill the replay.
 13 |     'use_gpu' : True,                           # Use True when GPU should be used for training and inference
 14 |     'use_cpu_for_rollout': False,               # TODO: Not used
 15 |     'cuda_device': 0,                           # Which cuda device to use. Only relevant if you have more than one GPU
 16 |     'exploration_strategy': 'random',           # Type of design exploration to use - we do usually one greedy design optimization and one random selection of a design
 17 |     'design_optim_method' : 'pso_batch',        # Which design optimization method to use
 18 |     'steps_per_episodes' : 1000,                # Number of steps per episode
 19 |     'save_networks' : True,                     # If True networks are checkpointed and saved for each design
 20 |     'rl_method' : 'SoftActorCritic',            # Which reinforcement learning method to use.
 21 |     'rl_algorithm_config' : dict(               # Dictonary which contains the parameters for the RL algorithm
 22 |         algo_params=dict(                           # Parameters for the RL learner for the individual networks
 23 |             # num_epochs=int(1),
 24 |             # num_steps_per_epoch=1000,
 25 |             # num_steps_per_eval=1,
 26 |             # num_updates_per_env_step=10,
 27 |             # num_updates_per_epoch=1000,
 28 |             # batch_size=256,
 29 |             discount=0.99,
 30 |             reward_scale=1.0,
 31 | 
 32 |             soft_target_tau=0.005,
 33 |             target_update_period=1,
 34 |             policy_lr=3E-4,
 35 |             qf_lr=3E-4,
 36 |             alpha=0.01,
 37 |             ),
 38 |         algo_params_pop=dict(                       # Parameters for the RL learner for the individual networks
 39 |             # num_epochs=int(1),
 40 |             # num_steps_per_epoch=1,
 41 |             # num_steps_per_eval=1,
 42 |             # num_updates_per_env_step=10,
 43 |             # num_updates_per_epoch=250,
 44 |             # batch_size=256,
 45 |             discount=0.99,
 46 |             reward_scale=1.0,
 47 | 
 48 |             soft_target_tau=0.005,
 49 |             target_update_period=1,
 50 |             policy_lr=3E-4,
 51 |             qf_lr=3E-4,
 52 |             alpha=0.01,
 53 |             ),
 54 |         net_size=200,                       # Number of neurons in hidden layer
 55 |         network_depth=3,                    # Number of hidden layers
 56 |         copy_from_gobal=True,               # Shall we pre-initialize the individual network with the global network?
 57 |         indiv_updates=1000,                 # Number of training updates per episode for individual networks
 58 |         pop_updates=250,                    # Number of training updates per episode for population networks
 59 |         batch_size=256,                     # Batch size
 60 |     ),
 61 |     'env' : dict(                           # Parameters and which environment to use
 62 |         env_name='HalfCheetah',             # Name of environment
 63 |         render=False,                       # Use True if you want to visualize/render the environment
 64 |         record_video=False,                  # Use True if you want to record videos
 65 |     ),
 66 |     }
 67 | 
 68 | sac_pso_sim = {
 69 |     'name' : 'Experiment 2: PSO using Simulations',
 70 |     'data_folder' : 'data_exp_sac_pso_sim',
 71 |     'nmbr_random_designs' : 0,
 72 |     'iterations_init' : 300,
 73 |     'iterations_random': 100,
 74 |     'iterations' : 100,
 75 |     'design_cycles' : 55,
 76 |     'state_batch_size' : 32,
 77 |     'initial_episodes' : 3,
 78 |     'use_gpu' : True,
 79 |     'use_cpu_for_rollout': False,
 80 |     'cuda_device': 0,
 81 |     'exploration_strategy': 'random',
 82 |     'design_optim_method' : 'pso_sim',
 83 |     'save_networks' : True,
 84 |     'rl_method' : 'SoftActorCritic',
 85 |     'rl_algorithm_config' : dict(
 86 |         algo_params=dict(
 87 |             # num_epochs=int(1),
 88 |             # num_steps_per_epoch=1000,
 89 |             # num_steps_per_eval=1,
 90 |             # num_updates_per_env_step=10,
 91 |             # num_updates_per_epoch=1000,
 92 |             # batch_size=256,
 93 |             discount=0.99,
 94 |             reward_scale=1.0,
 95 | 
 96 |             soft_target_tau=0.005,
 97 |             target_update_period=1,
 98 |             policy_lr=3E-4,
 99 |             qf_lr=3E-4,
100 |             alpha=0.01,
101 |             ),
102 |         algo_params_pop=dict(
103 |             # num_epochs=int(1),
104 |             # num_steps_per_epoch=1,
105 |             # num_steps_per_eval=1,
106 |             # num_updates_per_env_step=10,
107 |             # num_updates_per_epoch=250,
108 |             # batch_size=256,
109 |             discount=0.99,
110 |             reward_scale=1.0,
111 | 
112 |             soft_target_tau=0.005,
113 |             target_update_period=1,
114 |             policy_lr=3E-4,
115 |             qf_lr=3E-4,
116 |             alpha=0.01,
117 |             ),
118 |         net_size=200,
119 |         network_depth=3,
120 |         copy_from_gobal=True,
121 |         indiv_updates=1000,
122 |         pop_updates=250,
123 |         batch_size=256,
124 |     ),
125 |     'env' : dict(
126 |         env_name='HalfCheetah',
127 |         render=True,
128 |         record_video=False,
129 |     ),
130 |     }
131 | 
132 | config_dict = {
133 |     'sac_pso_batch' : sac_pso_batch,
134 |     'sac_pso_sim' : sac_pso_sim,
135 |     }
136 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import os, sys, time
 2 | import hashlib
 3 | import coadapt
 4 | import experiment_configs as cfg
 5 | import json
 6 | 
 7 | def main(config):
 8 | 
 9 |     # Create foldr in which to save results
10 |     folder = config['data_folder']
11 |     #generate random hash string - unique identifier if we start
12 |     # multiple experiments at the same time
13 |     rand_id = hashlib.md5(os.urandom(128)).hexdigest()[:8]
14 |     file_str = './' + folder + '/' + time.ctime().replace(' ', '_') + '__' + rand_id
15 |     config['data_folder_experiment'] = file_str
16 | 
17 |     # Create experiment folder
18 |     if not os.path.exists(file_str):
19 |       os.makedirs(file_str)
20 | 
21 |     # Store config
22 |     with open(os.path.join(file_str, 'config.json'), 'w') as fd:
23 |             fd.write(json.dumps(config,indent=2))
24 | 
25 |     co = coadapt.Coadaptation(config)
26 |     co.run()
27 | 
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     # We assume we call the program only with the name of the config we want to run
32 |     # nothing too complex
33 |     if len(sys.argv) > 1:
34 |         config = cfg.config_dict[sys.argv[1]]
35 |     else:
36 |         config = cfg.config_dict['base']
37 |     main(config)
38 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | gtimer
 2 | gym
 3 | joblib
 4 | numpy
 5 | pickleshare
 6 | ptyprocess
 7 | pybullet
 8 | pyswarms
 9 | PyYAML
10 | torch
11 | torchvision
12 | xmltodict
13 | xmlutils
14 | opencv-python
15 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import rlkit.torch.pytorch_util as ptu
  2 | import cv2
  3 | import os
  4 | from shutil import copyfile, move
  5 | import time
  6 | import numpy as np
  7 | 
  8 | def move_to_cpu():
  9 |     """ Set device to cpu for torch.
 10 |     """
 11 |     ptu.set_gpu_mode(False)
 12 | 
 13 | def move_to_cuda(config):
 14 |     """ Set device to CUDA and which GPU, or CPU if not set.
 15 | 
 16 |     Args:
 17 |         config: Dict containing the configuration.
 18 |     """
 19 |     if config['use_gpu']:
 20 |         if 'cuda_device' in config:
 21 |             cuda_device = config['cuda_device']
 22 |         else:
 23 |             cuda_device = 0
 24 |         ptu.set_gpu_mode(True, cuda_device)
 25 | 
 26 | def copy_pop_to_ind(networks_pop, networks_ind):
 27 |     """ Function used to copy params from pop. networks to individual networks.
 28 | 
 29 |     The parameters of all networks in network_ind will be set to the parameters
 30 |     of the networks in networks_ind.
 31 | 
 32 |     Args:
 33 |         networks_pop: Dictonary containing the population networks.
 34 |         networks_ind: Dictonary containing the individual networks. These
 35 |             networks will be updated.
 36 |     """
 37 |     for key in networks_pop:
 38 |         state_dict = networks_pop[key].state_dict()
 39 |         networks_ind[key].load_state_dict(state_dict)
 40 |         networks_ind[key].eval()
 41 | 
 42 | def copy_policie_to_cpu(policy_cpu, policy_gpu):
 43 |     # not used anymore
 44 |     policy_dict = policy_gpu.state_dict()
 45 |     for key, val in policy_dict.items():
 46 |         policy_dict[key] = val.cpu()
 47 |     policy_cpu.load_state_dict(policy_dict)
 48 |     policy_cpu = policy_cpu.cpu()
 49 |     policy_cpu.eval()
 50 |     return policy_cpu
 51 | 
 52 | def copy_network(network_to, network_from, config, force_cpu=False):
 53 |     """ Copies networks and set them to device or cpu.
 54 | 
 55 |     Args:
 56 |         networks_to: Netwoks to which we want to copy (destination).
 57 |         networks_from: Networks from which we want to copy (source). These
 58 |             networks will be changed.
 59 |         force_cpu: Boolean, if True the desitnation nateworks will be placed on
 60 |             the cpu.  If not the current device will be used.
 61 |     """
 62 |     network_from_dict = network_from.state_dict()
 63 |     if force_cpu:
 64 |         for key, val in network_from_dict.items():
 65 |             network_from_dict[key] = val.cpu()
 66 |     else:
 67 |         move_to_cuda(config)
 68 |     network_to.load_state_dict(network_from_dict)
 69 |     if force_cpu:
 70 |         network_to = network_to.to('cpu')
 71 |     else:
 72 |         network_to.to(ptu.device)
 73 |     network_to.eval()
 74 |     return network_to
 75 | 
 76 | class BestEpisodesVideoRecorder(object):
 77 |     def __init__(self, path=None, max_videos=1):
 78 |         self._vid_path = '/tmp/videos' if path is None else path
 79 | 
 80 |         self._folder_counter = 0
 81 |         self._keep_n_best = max(max_videos, 1)
 82 |         self._record_evy_n_episodes = 5
 83 | 
 84 |         self._frame_width = 200
 85 |         self._frame_height = 200
 86 |         self._fps_per_frame = 0
 87 | 
 88 |         self.increase_folder_counter()
 89 |         self._create_vid_stream()
 90 |         self._time_start = time.time()
 91 | 
 92 |     def _episodic_reset(self):
 93 |         self._current_episode_reward = 0
 94 |         self._did_at_least_one_step = False
 95 |         self._step_counter = 1
 96 | 
 97 |     def reset_recorder(self):
 98 |         self._episode_counter = 0
 99 |         self._episodic_rewards = [-float('inf')] * self._keep_n_best
100 |         self._episodic_reset()
101 | 
102 | 
103 |     def increase_folder_counter(self):
104 |         self._current_vid_path = os.path.join(self._vid_path, str(self._folder_counter))
105 |         self.reset_recorder()
106 |         self._folder_counter += 1
107 | 
108 |     def step(self, env, state, reward, done):
109 |         if self._episode_counter % self._record_evy_n_episodes == 0:
110 |             self._current_episode_reward += reward
111 |             env.camera_adjust()
112 |             frame = env.render_camera_image((self._frame_width, self._frame_height))
113 |             frame = frame * 255
114 |             frame = frame.astype(np.uint8)
115 |             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
116 |             self._vid_writer.write(frame)
117 |             self._did_at_least_one_step = True
118 |         proc_time = (time.time() - self._time_start)*1000
119 |         proc_time = 1000/proc_time
120 |         self._time_start = time.time()
121 |         self._fps_per_frame += proc_time
122 |         self._step_counter += 1
123 | 
124 |     def _do_video_file_rotation(self):
125 |         for idx, elem in enumerate(self._episodic_rewards):
126 |             if idx > 1:
127 |                 try:
128 |                     move(os.path.join(self._current_vid_path, 'video_{}.avi'.format(idx-1)), os.path.join(self._current_vid_path, 'video_{}.avi'.format(idx-2)))
129 |                 except:
130 |                     pass
131 |             if self._current_episode_reward < elem:
132 |                 self._episodic_rewards = self._episodic_rewards[1:idx] + [self._current_episode_reward] + self._episodic_rewards[idx:]
133 |                 copyfile(os.path.join(self._current_vid_path, 'current_video.avi'), os.path.join(self._current_vid_path, 'video_{}.avi'.format(idx-1)))
134 |                 break
135 |             # Will only be true in last iteration and only be hit if last element is to be moved
136 |             if idx == len(self._episodic_rewards)-1:
137 |                 try:
138 |                     move(os.path.join(self._current_vid_path, 'video_{}.avi'.format(idx)), os.path.join(self._current_vid_path, 'video_{}.avi'.format(idx-1)))
139 |                 except:
140 |                     pass
141 |                 self._episodic_rewards = self._episodic_rewards[1:] + [self._current_episode_reward]
142 |                 copyfile(os.path.join(self._current_vid_path, 'current_video.avi'), os.path.join(self._current_vid_path, 'video_{}.avi'.format(idx)))
143 | 
144 | 
145 |     def reset(self, env, state, reward, done):
146 |         # final processing of data from previous episode
147 |         if self._episode_counter % self._record_evy_n_episodes == 0:
148 |             env.camera_adjust()
149 |             self._vid_writer.release()
150 |             if not os.path.exists(self._current_vid_path):
151 |                 os.makedirs(self._current_vid_path)
152 |             if self._did_at_least_one_step and min(self._episodic_rewards) < self._current_episode_reward:
153 |                 self._do_video_file_rotation()
154 |             print('Average FPS of last episode: {}'.format(self._fps_per_frame/self._step_counter))
155 | 
156 |         self._episode_counter += 1
157 |         self._episodic_reset()
158 |         # set up everything for this episode if we record
159 |         if self._episode_counter % self._record_evy_n_episodes == 0:
160 |             self._create_vid_stream()
161 |             frame = env.render_camera_image((self._frame_width, self._frame_height))
162 |             frame = frame * 255
163 |             frame = frame.astype(np.uint8)
164 |             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
165 |             self._vid_writer.write(frame)
166 | 
167 |             self._time_start = time.time()
168 |             self._fps_per_frame = 0
169 |             self._step_counter = 1
170 | 
171 |     def _create_vid_stream(self):
172 |         if not os.path.exists(self._current_vid_path):
173 |             os.makedirs(self._current_vid_path)
174 |         self._vid_writer = cv2.VideoWriter(os.path.join(self._current_vid_path, 'current_video.avi'),
175 |             cv2.VideoWriter_fourcc('M','J','P','G'), 30, (self._frame_width, self._frame_height))
176 | 


--------------------------------------------------------------------------------