├── .DS_Store
├── LICENSE
├── README.md
├── actor.py
├── agent.py
├── ant_environments
├── .DS_Store
├── ant_four_rooms_2_levels
│ ├── README.md
│ ├── actor.py
│ ├── agent.py
│ ├── critic.py
│ ├── design_agent_and_env.py
│ ├── environment.py
│ ├── experience_buffer.py
│ ├── initialize_HAC.py
│ ├── layer.py
│ ├── models
│ │ ├── HAC.ckpt-99.data-00000-of-00001
│ │ ├── HAC.ckpt-99.index
│ │ ├── HAC.ckpt-99.meta
│ │ └── checkpoint
│ ├── mujoco_files
│ │ ├── ant_four_rooms.xml
│ │ ├── ant_reacher.xml
│ │ ├── common
│ │ │ ├── __init__.py
│ │ │ ├── materials.xml
│ │ │ ├── skybox.xml
│ │ │ └── visual.xml
│ │ ├── muj_gripper
│ │ │ ├── c_base.stl
│ │ │ ├── c_forearm.stl
│ │ │ ├── c_robotiq_85_gripper_joint_3_L.stl
│ │ │ ├── c_robotiq_85_gripper_joint_3_R.stl
│ │ │ ├── c_shoulder.stl
│ │ │ ├── c_upperarm.stl
│ │ │ ├── c_wrist1.stl
│ │ │ ├── c_wrist2.stl
│ │ │ ├── c_wrist3.stl
│ │ │ ├── glass_cup.stl
│ │ │ ├── glass_cup_2.stl
│ │ │ ├── glass_cup_3.stl
│ │ │ ├── inner_finger_coarse.stl
│ │ │ ├── inner_finger_fine.stl
│ │ │ ├── inner_knuckle_coarse.stl
│ │ │ ├── inner_knuckle_fine.stl
│ │ │ ├── new_solo_cup.stl
│ │ │ ├── outer_finger_coarse.stl
│ │ │ ├── outer_finger_fine.stl
│ │ │ ├── outer_knuckle_coarse.stl
│ │ │ ├── outer_knuckle_fine.stl
│ │ │ ├── red_solo_cup.stl
│ │ │ ├── robotiq_85_base_link_coarse.stl
│ │ │ ├── robotiq_85_base_link_fine.stl
│ │ │ ├── smaller_solo_cup.stl
│ │ │ ├── solo_cup.stl
│ │ │ ├── upd_solo_cup.stl
│ │ │ ├── v_base.stl
│ │ │ ├── v_forearm.stl
│ │ │ ├── v_robotiq_85_gripper_joint_3_L.stl
│ │ │ ├── v_robotiq_85_gripper_joint_3_R.stl
│ │ │ ├── v_shoulder.stl
│ │ │ ├── v_upperarm.stl
│ │ │ ├── v_wrist1.stl
│ │ │ ├── v_wrist2.stl
│ │ │ └── v_wrist3.stl
│ │ ├── pendulum.xml
│ │ └── ur5.xml
│ ├── options.py
│ ├── performance_log.p
│ ├── run_HAC.py
│ └── utils.py
├── ant_four_rooms_3_levels
│ ├── README.md
│ ├── actor.py
│ ├── agent.py
│ ├── critic.py
│ ├── design_agent_and_env.py
│ ├── environment.py
│ ├── experience_buffer.py
│ ├── initialize_HAC.py
│ ├── layer.py
│ ├── mujoco_files
│ │ ├── ant_four_rooms.xml
│ │ ├── ant_reacher.xml
│ │ ├── common
│ │ │ ├── __init__.py
│ │ │ ├── materials.xml
│ │ │ ├── skybox.xml
│ │ │ └── visual.xml
│ │ ├── muj_gripper
│ │ │ ├── c_base.stl
│ │ │ ├── c_forearm.stl
│ │ │ ├── c_robotiq_85_gripper_joint_3_L.stl
│ │ │ ├── c_robotiq_85_gripper_joint_3_R.stl
│ │ │ ├── c_shoulder.stl
│ │ │ ├── c_upperarm.stl
│ │ │ ├── c_wrist1.stl
│ │ │ ├── c_wrist2.stl
│ │ │ ├── c_wrist3.stl
│ │ │ ├── glass_cup.stl
│ │ │ ├── glass_cup_2.stl
│ │ │ ├── glass_cup_3.stl
│ │ │ ├── inner_finger_coarse.stl
│ │ │ ├── inner_finger_fine.stl
│ │ │ ├── inner_knuckle_coarse.stl
│ │ │ ├── inner_knuckle_fine.stl
│ │ │ ├── new_solo_cup.stl
│ │ │ ├── outer_finger_coarse.stl
│ │ │ ├── outer_finger_fine.stl
│ │ │ ├── outer_knuckle_coarse.stl
│ │ │ ├── outer_knuckle_fine.stl
│ │ │ ├── red_solo_cup.stl
│ │ │ ├── robotiq_85_base_link_coarse.stl
│ │ │ ├── robotiq_85_base_link_fine.stl
│ │ │ ├── smaller_solo_cup.stl
│ │ │ ├── solo_cup.stl
│ │ │ ├── upd_solo_cup.stl
│ │ │ ├── v_base.stl
│ │ │ ├── v_forearm.stl
│ │ │ ├── v_robotiq_85_gripper_joint_3_L.stl
│ │ │ ├── v_robotiq_85_gripper_joint_3_R.stl
│ │ │ ├── v_shoulder.stl
│ │ │ ├── v_upperarm.stl
│ │ │ ├── v_wrist1.stl
│ │ │ ├── v_wrist2.stl
│ │ │ └── v_wrist3.stl
│ │ ├── pendulum.xml
│ │ └── ur5.xml
│ ├── options.py
│ ├── run_HAC.py
│ └── utils.py
├── ant_reacher_2_levels
│ ├── .DS_Store
│ ├── README.md
│ ├── __pycache__
│ │ ├── actor.cpython-37.pyc
│ │ ├── agent.cpython-37.pyc
│ │ ├── critic.cpython-37.pyc
│ │ ├── design_agent_and_env.cpython-37.pyc
│ │ ├── environment.cpython-37.pyc
│ │ ├── experience_buffer.cpython-37.pyc
│ │ ├── layer.cpython-37.pyc
│ │ ├── options.cpython-37.pyc
│ │ ├── run_HAC.cpython-37.pyc
│ │ └── utils.cpython-37.pyc
│ ├── actor.py
│ ├── agent.py
│ ├── critic.py
│ ├── design_agent_and_env.py
│ ├── environment.py
│ ├── experience_buffer.py
│ ├── initialize_HAC.py
│ ├── layer.py
│ ├── models
│ │ ├── HAC.ckpt-99.data-00000-of-00001
│ │ ├── HAC.ckpt-99.index
│ │ ├── HAC.ckpt-99.meta
│ │ └── checkpoint
│ ├── mujoco_files
│ │ ├── ant_reacher.xml
│ │ ├── common
│ │ │ ├── __init__.py
│ │ │ ├── materials.xml
│ │ │ ├── skybox.xml
│ │ │ └── visual.xml
│ │ ├── muj_gripper
│ │ │ ├── c_base.stl
│ │ │ ├── c_forearm.stl
│ │ │ ├── c_robotiq_85_gripper_joint_3_L.stl
│ │ │ ├── c_robotiq_85_gripper_joint_3_R.stl
│ │ │ ├── c_shoulder.stl
│ │ │ ├── c_upperarm.stl
│ │ │ ├── c_wrist1.stl
│ │ │ ├── c_wrist2.stl
│ │ │ ├── c_wrist3.stl
│ │ │ ├── glass_cup.stl
│ │ │ ├── glass_cup_2.stl
│ │ │ ├── glass_cup_3.stl
│ │ │ ├── inner_finger_coarse.stl
│ │ │ ├── inner_finger_fine.stl
│ │ │ ├── inner_knuckle_coarse.stl
│ │ │ ├── inner_knuckle_fine.stl
│ │ │ ├── new_solo_cup.stl
│ │ │ ├── outer_finger_coarse.stl
│ │ │ ├── outer_finger_fine.stl
│ │ │ ├── outer_knuckle_coarse.stl
│ │ │ ├── outer_knuckle_fine.stl
│ │ │ ├── red_solo_cup.stl
│ │ │ ├── robotiq_85_base_link_coarse.stl
│ │ │ ├── robotiq_85_base_link_fine.stl
│ │ │ ├── smaller_solo_cup.stl
│ │ │ ├── solo_cup.stl
│ │ │ ├── upd_solo_cup.stl
│ │ │ ├── v_base.stl
│ │ │ ├── v_forearm.stl
│ │ │ ├── v_robotiq_85_gripper_joint_3_L.stl
│ │ │ ├── v_robotiq_85_gripper_joint_3_R.stl
│ │ │ ├── v_shoulder.stl
│ │ │ ├── v_upperarm.stl
│ │ │ ├── v_wrist1.stl
│ │ │ ├── v_wrist2.stl
│ │ │ └── v_wrist3.stl
│ │ ├── pendulum.xml
│ │ └── ur5.xml
│ ├── options.py
│ ├── run_HAC.py
│ └── utils.py
└── ant_reacher_3_levels
│ ├── README.md
│ ├── __pycache__
│ ├── actor.cpython-37.pyc
│ ├── agent.cpython-37.pyc
│ ├── critic.cpython-37.pyc
│ ├── design_agent_and_env.cpython-37.pyc
│ ├── environment.cpython-37.pyc
│ ├── experience_buffer.cpython-37.pyc
│ ├── layer.cpython-37.pyc
│ ├── options.cpython-37.pyc
│ ├── run_HAC.cpython-37.pyc
│ └── utils.cpython-37.pyc
│ ├── actor.py
│ ├── agent.py
│ ├── critic.py
│ ├── design_agent_and_env.py
│ ├── environment.py
│ ├── experience_buffer.py
│ ├── initialize_HAC.py
│ ├── layer.py
│ ├── models
│ ├── HAC.ckpt-99.data-00000-of-00001
│ ├── HAC.ckpt-99.index
│ ├── HAC.ckpt-99.meta
│ └── checkpoint
│ ├── mujoco_files
│ ├── ant_reacher.xml
│ ├── common
│ │ ├── __init__.py
│ │ ├── materials.xml
│ │ ├── skybox.xml
│ │ └── visual.xml
│ ├── muj_gripper
│ │ ├── c_base.stl
│ │ ├── c_forearm.stl
│ │ ├── c_robotiq_85_gripper_joint_3_L.stl
│ │ ├── c_robotiq_85_gripper_joint_3_R.stl
│ │ ├── c_shoulder.stl
│ │ ├── c_upperarm.stl
│ │ ├── c_wrist1.stl
│ │ ├── c_wrist2.stl
│ │ ├── c_wrist3.stl
│ │ ├── glass_cup.stl
│ │ ├── glass_cup_2.stl
│ │ ├── glass_cup_3.stl
│ │ ├── inner_finger_coarse.stl
│ │ ├── inner_finger_fine.stl
│ │ ├── inner_knuckle_coarse.stl
│ │ ├── inner_knuckle_fine.stl
│ │ ├── new_solo_cup.stl
│ │ ├── outer_finger_coarse.stl
│ │ ├── outer_finger_fine.stl
│ │ ├── outer_knuckle_coarse.stl
│ │ ├── outer_knuckle_fine.stl
│ │ ├── red_solo_cup.stl
│ │ ├── robotiq_85_base_link_coarse.stl
│ │ ├── robotiq_85_base_link_fine.stl
│ │ ├── smaller_solo_cup.stl
│ │ ├── solo_cup.stl
│ │ ├── upd_solo_cup.stl
│ │ ├── v_base.stl
│ │ ├── v_forearm.stl
│ │ ├── v_robotiq_85_gripper_joint_3_L.stl
│ │ ├── v_robotiq_85_gripper_joint_3_R.stl
│ │ ├── v_shoulder.stl
│ │ ├── v_upperarm.stl
│ │ ├── v_wrist1.stl
│ │ ├── v_wrist2.stl
│ │ └── v_wrist3.stl
│ ├── pendulum.xml
│ └── ur5.xml
│ ├── options.py
│ ├── run_HAC.py
│ └── utils.py
├── critic.py
├── design_agent_and_env.py
├── environment.py
├── example_designs
├── PENDULUM_LAY_1_design_agent_and_env.py
├── PENDULUM_LAY_2_design_agent_and_env.py
├── PENDULUM_LAY_3_design_agent_and_env.py
├── UR5_LAY_1_design_agent_and_env.py
├── UR5_LAY_2_design_agent_and_env.py
└── UR5_LAY_3_design_agent_and_env.py
├── experience_buffer.py
├── initialize_HAC.py
├── layer.py
├── mujoco_files
├── common
│ ├── __init__.py
│ ├── materials.xml
│ ├── skybox.xml
│ └── visual.xml
├── muj_gripper
│ ├── c_base.stl
│ ├── c_forearm.stl
│ ├── c_robotiq_85_gripper_joint_3_L.stl
│ ├── c_robotiq_85_gripper_joint_3_R.stl
│ ├── c_shoulder.stl
│ ├── c_upperarm.stl
│ ├── c_wrist1.stl
│ ├── c_wrist2.stl
│ ├── c_wrist3.stl
│ ├── glass_cup.stl
│ ├── glass_cup_2.stl
│ ├── glass_cup_3.stl
│ ├── inner_finger_coarse.stl
│ ├── inner_finger_fine.stl
│ ├── inner_knuckle_coarse.stl
│ ├── inner_knuckle_fine.stl
│ ├── new_solo_cup.stl
│ ├── outer_finger_coarse.stl
│ ├── outer_finger_fine.stl
│ ├── outer_knuckle_coarse.stl
│ ├── outer_knuckle_fine.stl
│ ├── red_solo_cup.stl
│ ├── robotiq_85_base_link_coarse.stl
│ ├── robotiq_85_base_link_fine.stl
│ ├── smaller_solo_cup.stl
│ ├── solo_cup.stl
│ ├── upd_solo_cup.stl
│ ├── v_base.stl
│ ├── v_forearm.stl
│ ├── v_robotiq_85_gripper_joint_3_L.stl
│ ├── v_robotiq_85_gripper_joint_3_R.stl
│ ├── v_shoulder.stl
│ ├── v_upperarm.stl
│ ├── v_wrist1.stl
│ ├── v_wrist2.stl
│ └── v_wrist3.stl
├── pendulum.xml
└── ur5.xml
├── options.py
├── run_HAC.py
└── utils.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/.DS_Store
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 andrew-j-levy
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical Actor-Critc (HAC)
2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions. For more information on the algorithm, please see our ICLR 2019 [paper](https://openreview.net/pdf?id=ryzECoAcY7) and [blog post](http://bigai.cs.brown.edu/2019/09/03/hac.html).
3 |
4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. To train agents in the inverted pendulum domain, swap the UR5 reacher *"design_agent_and_env.py"* file for an inverted pendulum *"design_agent_and_env.py"* file, which are located in *"example_designs"* folder folder. To train agents in the ant reacher and ant four rooms environments, execute the command *"python3 initialize_HAC.py --retrain"* in the appropriate folder within the *ant_environments* directory. In the near future, the code for the ant domains will be integrated with the code for the other domains.
5 |
6 | Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI.
7 |
8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu.
9 |
10 | ## UPDATE LOG
11 |
12 | ### 5/20/2020 - Key Changes
13 |
14 | 1. Added 2-level ant environments
15 | 2. Centralized exploration hyperparameters for ant environments in *design_agent_and_env.py* file
16 |
17 | ### 2/25/2020 - Key Changes
18 |
19 | 1. TensorFlow 2.x Compatible
20 |
21 | 2. Fine-tuned exploration parameters of the Ant Reacher environment
22 |
23 | ### 10/1/2019 - Key Changes
24 |
25 | 1. Added Ant Reacher and Ant Four Rooms Environments
26 |
27 | The code for the ant environments has been temporaily added to the *ant_environments* folder. In the near future, the code for the ant domains will be integrated with the code for the other domains. Only minimal changes to the code are needed to run the ant environments.
28 |
29 | ### 10/12/2018 - Key Changes
30 | 1. Bounded Q-Values
31 |
32 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value.
33 |
34 | 2. Removed Target Networks
35 |
36 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file.
37 |
38 | 3. Centralized Design Template
39 |
40 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy.
41 |
42 | 4. Added UR5 Reacher Environment
43 |
44 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly.
45 |
--------------------------------------------------------------------------------
/actor.py:
--------------------------------------------------------------------------------
1 | import tensorflow.compat.v1 as tf
2 | tf.disable_v2_behavior()
3 | import numpy as np
4 | from utils import layer
5 |
6 |
7 | class Actor():
8 |
9 | def __init__(self,
10 | sess,
11 | env,
12 | batch_size,
13 | layer_number,
14 | FLAGS,
15 | learning_rate=0.001,
16 | tau=0.05):
17 |
18 | self.sess = sess
19 |
20 | # Determine range of actor network outputs. This will be used to configure outer layer of neural network
21 | if layer_number == 0:
22 | self.action_space_bounds = env.action_bounds
23 | self.action_offset = env.action_offset
24 | else:
25 | # Determine symmetric range of subgoal space and offset
26 | self.action_space_bounds = env.subgoal_bounds_symmetric
27 | self.action_offset = env.subgoal_bounds_offset
28 |
29 | # Dimensions of action will depend on layer level
30 | if layer_number == 0:
31 | self.action_space_size = env.action_dim
32 | else:
33 | self.action_space_size = env.subgoal_dim
34 |
35 | self.actor_name = 'actor_' + str(layer_number)
36 |
37 | # Dimensions of goal placeholder will differ depending on layer level
38 | if layer_number == FLAGS.layers - 1:
39 | self.goal_dim = env.end_goal_dim
40 | else:
41 | self.goal_dim = env.subgoal_dim
42 |
43 | self.state_dim = env.state_dim
44 |
45 | self.learning_rate = learning_rate
46 | # self.exploration_policies = exploration_policies
47 | self.tau = tau
48 | self.batch_size = batch_size
49 |
50 | self.state_ph = tf.placeholder(tf.float32, shape=(None, self.state_dim))
51 | self.goal_ph = tf.placeholder(tf.float32, shape=(None, self.goal_dim))
52 | self.features_ph = tf.concat([self.state_ph, self.goal_ph], axis=1)
53 |
54 | # Create actor network
55 | self.infer = self.create_nn(self.features_ph)
56 |
57 | # Target network code "repurposed" from Patrick Emani :^)
58 | self.weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name]
59 | # self.num_weights = len(self.weights)
60 |
61 | # Create target actor network
62 | self.target = self.create_nn(self.features_ph, name = self.actor_name + '_target')
63 | self.target_weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name][len(self.weights):]
64 |
65 | self.update_target_weights = \
66 | [self.target_weights[i].assign(tf.multiply(self.weights[i], self.tau) +
67 | tf.multiply(self.target_weights[i], 1. - self.tau))
68 | for i in range(len(self.target_weights))]
69 |
70 | self.action_derivs = tf.placeholder(tf.float32, shape=(None, self.action_space_size))
71 | self.unnormalized_actor_gradients = tf.gradients(self.infer, self.weights, -self.action_derivs)
72 | self.policy_gradient = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients))
73 |
74 | # self.policy_gradient = tf.gradients(self.infer, self.weights, -self.action_derivs)
75 | self.train = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(self.policy_gradient, self.weights))
76 |
77 |
78 | def get_action(self, state, goal):
79 | actions = self.sess.run(self.infer,
80 | feed_dict={
81 | self.state_ph: state,
82 | self.goal_ph: goal
83 | })
84 |
85 | return actions
86 |
87 | def get_target_action(self, state, goal):
88 | actions = self.sess.run(self.target,
89 | feed_dict={
90 | self.state_ph: state,
91 | self.goal_ph: goal
92 | })
93 |
94 | return actions
95 |
96 | def update(self, state, goal, action_derivs):
97 | weights, policy_grad, _ = self.sess.run([self.weights, self.policy_gradient, self.train],
98 | feed_dict={
99 | self.state_ph: state,
100 | self.goal_ph: goal,
101 | self.action_derivs: action_derivs
102 | })
103 |
104 | return len(weights)
105 |
106 | # self.sess.run(self.update_target_weights)
107 |
108 | # def create_nn(self, state, goal, name='actor'):
109 | def create_nn(self, features, name=None):
110 |
111 | if name is None:
112 | name = self.actor_name
113 |
114 | with tf.variable_scope(name + '_fc_1'):
115 | fc1 = layer(features, 64)
116 | with tf.variable_scope(name + '_fc_2'):
117 | fc2 = layer(fc1, 64)
118 | with tf.variable_scope(name + '_fc_3'):
119 | fc3 = layer(fc2, 64)
120 | with tf.variable_scope(name + '_fc_4'):
121 | fc4 = layer(fc3, self.action_space_size, is_output=True)
122 |
123 | output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset
124 |
125 | return output
126 |
127 |
128 |
--------------------------------------------------------------------------------
/ant_environments/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/.DS_Store
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical Actor-Critc (HAC)
2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions.
3 |
4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI.
5 |
6 | To run HAC with your own agents and MuJoCo environments, you need to complete the template in the *"design_agent_and_env.py"* file. The *"example_designs"* folder contains other examples of design templates that build different agents in the UR5 reacher and inverted pendulum environments.
7 |
8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu.
9 |
10 | ## UPDATE LOG
11 |
12 | ### 10/12/2018 - Key Changes
13 | 1. Bounded Q-Values
14 |
15 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value.
16 |
17 | 2. Removed Target Networks
18 |
19 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file.
20 |
21 | 3. Centralized Design Template
22 |
23 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy.
24 |
25 | 4. Added UR5 Reacher Environment
26 |
27 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly.
28 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/actor.py:
--------------------------------------------------------------------------------
1 | import tensorflow.compat.v1 as tf
2 | tf.disable_v2_behavior()
3 | import numpy as np
4 | from utils import layer
5 |
6 |
7 | class Actor():
8 |
9 | def __init__(self,
10 | sess,
11 | env,
12 | batch_size,
13 | layer_number,
14 | FLAGS,
15 | learning_rate=0.001,
16 | tau=0.05):
17 |
18 | self.sess = sess
19 |
20 | # Determine range of actor network outputs. This will be used to configure outer layer of neural network
21 | if layer_number == 0:
22 | self.action_space_bounds = env.action_bounds
23 | self.action_offset = env.action_offset
24 | else:
25 | # Determine symmetric range of subgoal space and offset
26 | self.action_space_bounds = env.subgoal_bounds_symmetric
27 | self.action_offset = env.subgoal_bounds_offset
28 |
29 | # Dimensions of action will depend on layer level
30 | if layer_number == 0:
31 | self.action_space_size = env.action_dim
32 | else:
33 | self.action_space_size = env.subgoal_dim
34 |
35 | self.actor_name = 'actor_' + str(layer_number)
36 |
37 | # Dimensions of goal placeholder will differ depending on layer level
38 | if layer_number == FLAGS.layers - 1:
39 | self.goal_dim = env.end_goal_dim
40 | else:
41 | self.goal_dim = env.subgoal_dim
42 |
43 | self.state_dim = env.state_dim
44 |
45 | self.learning_rate = learning_rate
46 | # self.exploration_policies = exploration_policies
47 | self.tau = tau
48 | # self.batch_size = batch_size
49 | self.batch_size = tf.placeholder(tf.float32)
50 |
51 | self.state_ph = tf.placeholder(tf.float32, shape=(None, self.state_dim))
52 | self.goal_ph = tf.placeholder(tf.float32, shape=(None, self.goal_dim))
53 | self.features_ph = tf.concat([self.state_ph, self.goal_ph], axis=1)
54 |
55 | # Create actor network
56 | self.infer = self.create_nn(self.features_ph)
57 |
58 | # Target network code "repurposed" from Patrick Emani :^)
59 | self.weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name]
60 | # self.num_weights = len(self.weights)
61 |
62 | # Create target actor network
63 | self.target = self.create_nn(self.features_ph, name = self.actor_name + '_target')
64 | self.target_weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name][len(self.weights):]
65 |
66 | self.update_target_weights = \
67 | [self.target_weights[i].assign(tf.multiply(self.weights[i], self.tau) +
68 | tf.multiply(self.target_weights[i], 1. - self.tau))
69 | for i in range(len(self.target_weights))]
70 |
71 | self.action_derivs = tf.placeholder(tf.float32, shape=(None, self.action_space_size))
72 | self.unnormalized_actor_gradients = tf.gradients(self.infer, self.weights, -self.action_derivs)
73 | self.policy_gradient = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients))
74 |
75 | # self.policy_gradient = tf.gradients(self.infer, self.weights, -self.action_derivs)
76 | self.train = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(self.policy_gradient, self.weights))
77 |
78 |
79 | def get_action(self, state, goal):
80 | actions = self.sess.run(self.infer,
81 | feed_dict={
82 | self.state_ph: state,
83 | self.goal_ph: goal
84 | })
85 |
86 | return actions
87 |
88 | def get_target_action(self, state, goal):
89 | actions = self.sess.run(self.target,
90 | feed_dict={
91 | self.state_ph: state,
92 | self.goal_ph: goal
93 | })
94 |
95 | return actions
96 |
97 | def update(self, state, goal, action_derivs, next_batch_size):
98 | weights, policy_grad, _ = self.sess.run([self.weights, self.policy_gradient, self.train],
99 | feed_dict={
100 | self.state_ph: state,
101 | self.goal_ph: goal,
102 | self.action_derivs: action_derivs,
103 | self.batch_size: next_batch_size
104 | })
105 |
106 | return len(weights)
107 |
108 | # self.sess.run(self.update_target_weights)
109 |
110 | # def create_nn(self, state, goal, name='actor'):
111 | def create_nn(self, features, name=None):
112 |
113 | if name is None:
114 | name = self.actor_name
115 |
116 | with tf.variable_scope(name + '_fc_1'):
117 | fc1 = layer(features, 64)
118 | with tf.variable_scope(name + '_fc_2'):
119 | fc2 = layer(fc1, 64)
120 | with tf.variable_scope(name + '_fc_3'):
121 | fc3 = layer(fc2, 64)
122 | with tf.variable_scope(name + '_fc_4'):
123 | fc4 = layer(fc3, self.action_space_size, is_output=True)
124 |
125 | output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset
126 |
127 | return output
128 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/experience_buffer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class ExperienceBuffer():
4 |
5 | def __init__(self, max_buffer_size, batch_size):
6 | self.size = 0
7 | self.max_buffer_size = max_buffer_size
8 | self.experiences = []
9 | self.batch_size = batch_size
10 |
11 | def add(self, experience):
12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')'
13 | assert type(experience[5]) == bool
14 |
15 | self.experiences.append(experience)
16 | self.size += 1
17 |
18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance
19 | if self.size >= self.max_buffer_size:
20 | beg_index = int(np.floor(self.max_buffer_size/6))
21 | self.experiences = self.experiences[beg_index:]
22 | self.size -= beg_index
23 |
24 | def get_batch(self):
25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], []
26 | dist = np.random.randint(0, high=self.size, size=min(self.size, self.batch_size))
27 |
28 | for i in dist:
29 | states.append(self.experiences[i][0])
30 | actions.append(self.experiences[i][1])
31 | rewards.append(self.experiences[i][2])
32 | new_states.append(self.experiences[i][3])
33 | goals.append(self.experiences[i][4])
34 | is_terminals.append(self.experiences[i][5])
35 |
36 | return states, actions, rewards, new_states, goals, is_terminals
37 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/initialize_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified
3 | by the user and instantiates the environment and agent.
4 | """
5 |
6 | from design_agent_and_env import design_agent_and_env
7 | from options import parse_options
8 | from agent import Agent
9 | from run_HAC import run_HAC
10 |
11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file.
12 | FLAGS = parse_options()
13 |
14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file.
15 | agent, env = design_agent_and_env(FLAGS)
16 |
17 | # Begin training
18 | run_HAC(FLAGS,env,agent)
19 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.data-00000-of-00001
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.index
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.meta
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/models/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "/home/tester/Documents/HAC/extra_copy/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_four_rooms_2_levels_2/models/HAC.ckpt-99"
2 | all_model_checkpoint_paths: "/home/tester/Documents/HAC/extra_copy/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_four_rooms_2_levels_2/models/HAC.ckpt-99"
3 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The dm_control Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 |
16 | """Functions to manage the common assets for domains."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import os
23 | from dm_control.utils import resources
24 |
25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__))
26 | _FILENAMES = [
27 | "common/materials.xml",
28 | "common/skybox.xml",
29 | "common/visual.xml",
30 | ]
31 |
32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename))
33 | for filename in _FILENAMES}
34 |
35 |
36 | def read_model(model_filename):
37 | """Reads a model XML file and returns its contents as a string."""
38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename))
39 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/common/materials.xml:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/common/skybox.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/common/visual.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_base.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_forearm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_shoulder.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_upperarm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist1.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup_2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup_3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_finger_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/new_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/new_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_finger_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/red_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/red_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/upd_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/upd_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_base.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_forearm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_shoulder.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_upperarm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist1.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/mujoco_files/pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/options.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | """
4 | Below are training options user can specify in command line.
5 |
6 | Options Include:
7 |
8 | 1. Retrain boolean ("--retrain")
9 | - If included, actor and critic neural network parameters are reset
10 |
11 | 2. Testing boolean ("--test")
12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks.
13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress.
14 |
15 | 3. Show boolean ("--show")
16 | - If included, training will be visualized
17 |
18 | 4. Train Only boolean ("--train_only")
19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing
20 |
21 | 5. Verbosity boolean ("--verbose")
22 | - If included, summary of each transition will be printed
23 |
24 | 6. All Trans boolean ("--all_trans")
25 | - If included, all transitions including (i) hindsight action, (ii) subgoal penalty, (iii) preliminary HER, and (iv) final HER transitions will be printed. Use below options to print out specific types of transitions.
26 |
27 | 7. Hindsight Action trans boolean ("hind_action")
28 | - If included, prints hindsight actions transitions for each level
29 |
30 | 8. Subgoal Penalty trans ("penalty")
31 | - If included, prints the subgoal penalty transitions
32 |
33 | 9. Preliminary HER trans ("prelim_HER")
34 | -If included, prints the preliminary HER transitions (i.e., with TBD reward and goal components)
35 |
36 | 10. HER trans ("HER")
37 | - If included, prints the final HER transitions for each level
38 |
39 | 11. Show Q-values ("--Q_values")
40 | - Show Q-values for each action by each level
41 |
42 | """
43 |
44 | def parse_options():
45 | parser = argparse.ArgumentParser()
46 |
47 | parser.add_argument(
48 | '--retrain',
49 | action='store_true',
50 | help='Include to reset policy'
51 | )
52 |
53 | parser.add_argument(
54 | '--test',
55 | action='store_true',
56 | help='Include to fix current policy'
57 | )
58 |
59 | parser.add_argument(
60 | '--show',
61 | action='store_true',
62 | help='Include to visualize training'
63 | )
64 |
65 | parser.add_argument(
66 | '--train_only',
67 | action='store_true',
68 | help='Include to use training mode only'
69 | )
70 |
71 | parser.add_argument(
72 | '--verbose',
73 | action='store_true',
74 | help='Print summary of each transition'
75 | )
76 |
77 | parser.add_argument(
78 | '--all_trans',
79 | action='store_true',
80 | help='Print summary of each transition'
81 | )
82 |
83 | parser.add_argument(
84 | '--hind_action',
85 | action='store_true',
86 | help='Print summary of each transition'
87 | )
88 |
89 | parser.add_argument(
90 | '--penalty',
91 | action='store_true',
92 | help='Print summary of each transition'
93 | )
94 |
95 | parser.add_argument(
96 | '--prelim_HER',
97 | action='store_true',
98 | help='Print summary of each transition'
99 | )
100 |
101 | parser.add_argument(
102 | '--HER',
103 | action='store_true',
104 | help='Print summary of each transition'
105 | )
106 |
107 | parser.add_argument(
108 | '--Q_values',
109 | action='store_true',
110 | help='Print summary of each transition'
111 | )
112 |
113 | FLAGS, unparsed = parser.parse_known_args()
114 |
115 |
116 | return FLAGS
117 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/performance_log.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/performance_log.p
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/run_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file.
3 | """
4 |
5 | import pickle as cpickle
6 | import agent as Agent
7 | from utils import print_summary
8 |
9 | NUM_BATCH = 1000
10 | TEST_FREQ = 2
11 |
12 | num_test_episodes = 100
13 |
14 | def run_HAC(FLAGS,env,agent):
15 |
16 | # Print task summary
17 | print_summary(FLAGS,env)
18 |
19 | total_episodes = 0
20 |
21 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress
22 | mix_train_test = False
23 | if not FLAGS.test and not FLAGS.train_only:
24 | mix_train_test = True
25 |
26 | for batch in range(NUM_BATCH):
27 |
28 | num_episodes = agent.other_params["num_exploration_episodes"]
29 |
30 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing
31 | if mix_train_test and batch % TEST_FREQ == 0:
32 | print("\n--- TESTING ---")
33 | agent.FLAGS.test = True
34 | num_episodes = num_test_episodes
35 |
36 | # Reset successful episode counter
37 | successful_episodes = 0
38 |
39 | for episode in range(num_episodes):
40 |
41 | print("\nBatch %d, Episode %d" % (batch, episode))
42 |
43 | # Train for an episode
44 | success = agent.train(env, episode, total_episodes)
45 |
46 | if success:
47 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode))
48 |
49 | # Increment successful episode counter if applicable
50 | if mix_train_test and batch % TEST_FREQ == 0:
51 | successful_episodes += 1
52 |
53 | if FLAGS.train_only or (mix_train_test and batch % TEST_FREQ != 0):
54 | total_episodes += 1
55 |
56 | # Save agent
57 | agent.save_model(episode)
58 |
59 | # Finish evaluating policy if tested prior batch
60 | if mix_train_test and batch % TEST_FREQ == 0:
61 |
62 | # Log performance
63 | success_rate = successful_episodes / num_test_episodes * 100
64 | print("\nTesting Success Rate %.2f%%" % success_rate)
65 | agent.log_performance(success_rate)
66 | agent.FLAGS.test = False
67 |
68 | print("\n--- END TESTING ---\n")
69 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_2_levels/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow.compat.v1 as tf
2 | tf.disable_v2_behavior()
3 |
4 | def layer(input_layer, num_next_neurons, is_output=False):
5 | num_prev_neurons = int(input_layer.shape[1])
6 | shape = [num_prev_neurons, num_next_neurons]
7 |
8 | if is_output:
9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
11 | else:
12 | # 1/sqrt(f)
13 | fan_in_init = 1 / num_prev_neurons ** 0.5
14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
16 |
17 | weights = tf.get_variable("weights", shape, initializer=weight_init)
18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
19 |
20 | dot = tf.matmul(input_layer, weights) + biases
21 |
22 | if is_output:
23 | return dot
24 |
25 | relu = tf.nn.relu(dot)
26 | return relu
27 |
28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False):
29 | num_prev_neurons = int(input_layer.shape[1])
30 | shape = [num_prev_neurons, num_next_neurons]
31 |
32 |
33 | fan_in_init = 1 / num_prev_neurons ** 0.5
34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
36 |
37 | weights = tf.get_variable("weights", shape, initializer=weight_init)
38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
39 |
40 | dot = tf.matmul(input_layer, weights) + biases
41 |
42 | if is_output:
43 | return dot
44 |
45 | relu = tf.nn.relu(dot)
46 | return relu
47 |
48 |
49 | # Below function prints out options and environment specified by user
50 | def print_summary(FLAGS,env):
51 |
52 | print("\n- - - - - - - - - - -")
53 | print("Task Summary: ","\n")
54 | print("Environment: ", env.name)
55 | print("Number of Layers: ", FLAGS.layers)
56 | print("Time Limit per Layer: ", FLAGS.time_scale)
57 | print("Max Episode Time Steps: ", env.max_actions)
58 | print("Retrain: ", FLAGS.retrain)
59 | print("Test: ", FLAGS.test)
60 | print("Visualize: ", FLAGS.show)
61 | print("- - - - - - - - - - -", "\n\n")
62 |
63 |
64 | # Below function ensures environment configurations were properly entered
65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action):
66 |
67 | # Ensure model file is an ".xml" file
68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file"
69 |
70 | # Ensure upper bounds of range is >= lower bound of range
71 | if goal_space_train is not None:
72 | for i in range(len(goal_space_train)):
73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound"
74 |
75 | if goal_space_test is not None:
76 | for i in range(len(goal_space_test)):
77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound"
78 |
79 | for i in range(len(initial_state_space)):
80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound"
81 |
82 | for i in range(len(subgoal_bounds)):
83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound"
84 |
85 | # Make sure end goal spaces and thresholds have same first dimension
86 | if goal_space_train is not None and goal_space_test is not None:
87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension"
88 |
89 | # Makde sure suboal spaces and thresholds have same dimensions
90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension"
91 |
92 | # Ensure max action and timesteps_per_action are postive integers
93 | assert max_actions > 0, "Max actions should be a positive integer"
94 |
95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer"
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical Actor-Critc (HAC)
2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions.
3 |
4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI.
5 |
6 | To run HAC with your own agents and MuJoCo environments, you need to complete the template in the *"design_agent_and_env.py"* file. The *"example_designs"* folder contains other examples of design templates that build different agents in the UR5 reacher and inverted pendulum environments.
7 |
8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu.
9 |
10 | ## UPDATE LOG
11 |
12 | ### 10/12/2018 - Key Changes
13 | 1. Bounded Q-Values
14 |
15 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value.
16 |
17 | 2. Removed Target Networks
18 |
19 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file.
20 |
21 | 3. Centralized Design Template
22 |
23 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy.
24 |
25 | 4. Added UR5 Reacher Environment
26 |
27 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly.
28 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/actor.py:
--------------------------------------------------------------------------------
1 | import tensorflow.compat.v1 as tf
2 | tf.disable_v2_behavior()
3 | import numpy as np
4 | from utils import layer
5 |
6 |
7 | class Actor():
8 |
9 | def __init__(self,
10 | sess,
11 | env,
12 | batch_size,
13 | layer_number,
14 | FLAGS,
15 | learning_rate=0.001,
16 | tau=0.05):
17 |
18 | self.sess = sess
19 |
20 | # Determine range of actor network outputs. This will be used to configure outer layer of neural network
21 | if layer_number == 0:
22 | self.action_space_bounds = env.action_bounds
23 | self.action_offset = env.action_offset
24 | else:
25 | # Determine symmetric range of subgoal space and offset
26 | self.action_space_bounds = env.subgoal_bounds_symmetric
27 | self.action_offset = env.subgoal_bounds_offset
28 |
29 | # Dimensions of action will depend on layer level
30 | if layer_number == 0:
31 | self.action_space_size = env.action_dim
32 | else:
33 | self.action_space_size = env.subgoal_dim
34 |
35 | self.actor_name = 'actor_' + str(layer_number)
36 |
37 | # Dimensions of goal placeholder will differ depending on layer level
38 | if layer_number == FLAGS.layers - 1:
39 | self.goal_dim = env.end_goal_dim
40 | else:
41 | self.goal_dim = env.subgoal_dim
42 |
43 | self.state_dim = env.state_dim
44 |
45 | self.learning_rate = learning_rate
46 | # self.exploration_policies = exploration_policies
47 | self.tau = tau
48 | # self.batch_size = batch_size
49 | self.batch_size = tf.placeholder(tf.float32)
50 |
51 | self.state_ph = tf.placeholder(tf.float32, shape=(None, self.state_dim))
52 | self.goal_ph = tf.placeholder(tf.float32, shape=(None, self.goal_dim))
53 | self.features_ph = tf.concat([self.state_ph, self.goal_ph], axis=1)
54 |
55 | # Create actor network
56 | self.infer = self.create_nn(self.features_ph)
57 |
58 | # Target network code "repurposed" from Patrick Emani :^)
59 | self.weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name]
60 | # self.num_weights = len(self.weights)
61 |
62 | # Create target actor network
63 | self.target = self.create_nn(self.features_ph, name = self.actor_name + '_target')
64 | self.target_weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name][len(self.weights):]
65 |
66 | self.update_target_weights = \
67 | [self.target_weights[i].assign(tf.multiply(self.weights[i], self.tau) +
68 | tf.multiply(self.target_weights[i], 1. - self.tau))
69 | for i in range(len(self.target_weights))]
70 |
71 | self.action_derivs = tf.placeholder(tf.float32, shape=(None, self.action_space_size))
72 | self.unnormalized_actor_gradients = tf.gradients(self.infer, self.weights, -self.action_derivs)
73 | self.policy_gradient = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients))
74 |
75 | # self.policy_gradient = tf.gradients(self.infer, self.weights, -self.action_derivs)
76 | self.train = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(self.policy_gradient, self.weights))
77 |
78 |
79 | def get_action(self, state, goal):
80 | actions = self.sess.run(self.infer,
81 | feed_dict={
82 | self.state_ph: state,
83 | self.goal_ph: goal
84 | })
85 |
86 | return actions
87 |
88 | def get_target_action(self, state, goal):
89 | actions = self.sess.run(self.target,
90 | feed_dict={
91 | self.state_ph: state,
92 | self.goal_ph: goal
93 | })
94 |
95 | return actions
96 |
97 | def update(self, state, goal, action_derivs, next_batch_size):
98 | weights, policy_grad, _ = self.sess.run([self.weights, self.policy_gradient, self.train],
99 | feed_dict={
100 | self.state_ph: state,
101 | self.goal_ph: goal,
102 | self.action_derivs: action_derivs,
103 | self.batch_size: next_batch_size
104 | })
105 |
106 | return len(weights)
107 |
108 | # self.sess.run(self.update_target_weights)
109 |
110 | # def create_nn(self, state, goal, name='actor'):
111 | def create_nn(self, features, name=None):
112 |
113 | if name is None:
114 | name = self.actor_name
115 |
116 | with tf.variable_scope(name + '_fc_1'):
117 | fc1 = layer(features, 64)
118 | with tf.variable_scope(name + '_fc_2'):
119 | fc2 = layer(fc1, 64)
120 | with tf.variable_scope(name + '_fc_3'):
121 | fc3 = layer(fc2, 64)
122 | with tf.variable_scope(name + '_fc_4'):
123 | fc4 = layer(fc3, self.action_space_size, is_output=True)
124 |
125 | output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset
126 |
127 | return output
128 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/experience_buffer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class ExperienceBuffer():
4 |
5 | def __init__(self, max_buffer_size, batch_size):
6 | self.size = 0
7 | self.max_buffer_size = max_buffer_size
8 | self.experiences = []
9 | self.batch_size = batch_size
10 |
11 | def add(self, experience):
12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')'
13 | assert type(experience[5]) == bool
14 |
15 | self.experiences.append(experience)
16 | self.size += 1
17 |
18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance
19 | if self.size >= self.max_buffer_size:
20 | beg_index = int(np.floor(self.max_buffer_size/6))
21 | self.experiences = self.experiences[beg_index:]
22 | self.size -= beg_index
23 |
24 | def get_batch(self):
25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], []
26 | dist = np.random.randint(0, high=self.size, size=min(self.size, self.batch_size))
27 |
28 | for i in dist:
29 | states.append(self.experiences[i][0])
30 | actions.append(self.experiences[i][1])
31 | rewards.append(self.experiences[i][2])
32 | new_states.append(self.experiences[i][3])
33 | goals.append(self.experiences[i][4])
34 | is_terminals.append(self.experiences[i][5])
35 |
36 | return states, actions, rewards, new_states, goals, is_terminals
37 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/initialize_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified
3 | by the user and instantiates the environment and agent.
4 | """
5 |
6 | from design_agent_and_env import design_agent_and_env
7 | from options import parse_options
8 | from agent import Agent
9 | from run_HAC import run_HAC
10 |
11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file.
12 | FLAGS = parse_options()
13 |
14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file.
15 | agent, env = design_agent_and_env(FLAGS)
16 |
17 | # Begin training
18 | run_HAC(FLAGS,env,agent)
19 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The dm_control Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 |
16 | """Functions to manage the common assets for domains."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import os
23 | from dm_control.utils import resources
24 |
25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__))
26 | _FILENAMES = [
27 | "common/materials.xml",
28 | "common/skybox.xml",
29 | "common/visual.xml",
30 | ]
31 |
32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename))
33 | for filename in _FILENAMES}
34 |
35 |
36 | def read_model(model_filename):
37 | """Reads a model XML file and returns its contents as a string."""
38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename))
39 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/common/materials.xml:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/common/skybox.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/common/visual.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_base.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_forearm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_shoulder.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_upperarm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist1.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup_2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup_3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_finger_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/new_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/new_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_finger_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/red_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/red_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/upd_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/upd_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_base.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_forearm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_shoulder.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_upperarm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist1.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/mujoco_files/pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/options.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | """
4 | Below are training options user can specify in command line.
5 |
6 | Options Include:
7 |
8 | 1. Retrain boolean ("--retrain")
9 | - If included, actor and critic neural network parameters are reset
10 |
11 | 2. Testing boolean ("--test")
12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks.
13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress.
14 |
15 | 3. Show boolean ("--show")
16 | - If included, training will be visualized
17 |
18 | 4. Train Only boolean ("--train_only")
19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing
20 |
21 | 5. Verbosity boolean ("--verbose")
22 | - If included, summary of each transition will be printed
23 |
24 | 6. All Trans boolean ("--all_trans")
25 | - If included, all transitions including (i) hindsight action, (ii) subgoal penalty, (iii) preliminary HER, and (iv) final HER transitions will be printed. Use below options to print out specific types of transitions.
26 |
27 | 7. Hindsight Action trans boolean ("hind_action")
28 | - If included, prints hindsight actions transitions for each level
29 |
30 | 8. Subgoal Penalty trans ("penalty")
31 | - If included, prints the subgoal penalty transitions
32 |
33 | 9. Preliminary HER trans ("prelim_HER")
34 | -If included, prints the preliminary HER transitions (i.e., with TBD reward and goal components)
35 |
36 | 10. HER trans ("HER")
37 | - If included, prints the final HER transitions for each level
38 |
39 | 11. Show Q-values ("--Q_values")
40 | - Show Q-values for each action by each level
41 |
42 | """
43 |
44 | def parse_options():
45 | parser = argparse.ArgumentParser()
46 |
47 | parser.add_argument(
48 | '--retrain',
49 | action='store_true',
50 | help='Include to reset policy'
51 | )
52 |
53 | parser.add_argument(
54 | '--test',
55 | action='store_true',
56 | help='Include to fix current policy'
57 | )
58 |
59 | parser.add_argument(
60 | '--show',
61 | action='store_true',
62 | help='Include to visualize training'
63 | )
64 |
65 | parser.add_argument(
66 | '--train_only',
67 | action='store_true',
68 | help='Include to use training mode only'
69 | )
70 |
71 | parser.add_argument(
72 | '--verbose',
73 | action='store_true',
74 | help='Print summary of each transition'
75 | )
76 |
77 | parser.add_argument(
78 | '--all_trans',
79 | action='store_true',
80 | help='Print summary of each transition'
81 | )
82 |
83 | parser.add_argument(
84 | '--hind_action',
85 | action='store_true',
86 | help='Print summary of each transition'
87 | )
88 |
89 | parser.add_argument(
90 | '--penalty',
91 | action='store_true',
92 | help='Print summary of each transition'
93 | )
94 |
95 | parser.add_argument(
96 | '--prelim_HER',
97 | action='store_true',
98 | help='Print summary of each transition'
99 | )
100 |
101 | parser.add_argument(
102 | '--HER',
103 | action='store_true',
104 | help='Print summary of each transition'
105 | )
106 |
107 | parser.add_argument(
108 | '--Q_values',
109 | action='store_true',
110 | help='Print summary of each transition'
111 | )
112 |
113 | FLAGS, unparsed = parser.parse_known_args()
114 |
115 |
116 | return FLAGS
117 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/run_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file.
3 | """
4 |
5 | import pickle as cpickle
6 | import agent as Agent
7 | from utils import print_summary
8 |
9 | NUM_BATCH = 1000
10 | TEST_FREQ = 2
11 |
12 | num_test_episodes = 100
13 |
14 | def run_HAC(FLAGS,env,agent):
15 |
16 | # Print task summary
17 | print_summary(FLAGS,env)
18 |
19 | total_episodes = 0
20 |
21 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress
22 | mix_train_test = False
23 | if not FLAGS.test and not FLAGS.train_only:
24 | mix_train_test = True
25 |
26 | for batch in range(NUM_BATCH):
27 |
28 | num_episodes = agent.other_params["num_exploration_episodes"]
29 |
30 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing
31 | if mix_train_test and batch % TEST_FREQ == 0:
32 | print("\n--- TESTING ---")
33 | agent.FLAGS.test = True
34 | num_episodes = num_test_episodes
35 |
36 | # Reset successful episode counter
37 | successful_episodes = 0
38 |
39 | for episode in range(num_episodes):
40 |
41 | print("\nBatch %d, Episode %d" % (batch, episode))
42 |
43 | # Train for an episode
44 | success = agent.train(env, episode, total_episodes)
45 |
46 | if success:
47 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode))
48 |
49 | # Increment successful episode counter if applicable
50 | if mix_train_test and batch % TEST_FREQ == 0:
51 | successful_episodes += 1
52 |
53 | if FLAGS.train_only or (mix_train_test and batch % TEST_FREQ != 0):
54 | total_episodes += 1
55 |
56 | # Save agent
57 | agent.save_model(episode)
58 |
59 | # Finish evaluating policy if tested prior batch
60 | if mix_train_test and batch % TEST_FREQ == 0:
61 |
62 | # Log performance
63 | success_rate = successful_episodes / num_test_episodes * 100
64 | print("\nTesting Success Rate %.2f%%" % success_rate)
65 | agent.log_performance(success_rate)
66 | agent.FLAGS.test = False
67 |
68 | print("\n--- END TESTING ---\n")
69 |
--------------------------------------------------------------------------------
/ant_environments/ant_four_rooms_3_levels/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow.compat.v1 as tf
2 | tf.disable_v2_behavior()
3 |
4 | def layer(input_layer, num_next_neurons, is_output=False):
5 | num_prev_neurons = int(input_layer.shape[1])
6 | shape = [num_prev_neurons, num_next_neurons]
7 |
8 | if is_output:
9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
11 | else:
12 | # 1/sqrt(f)
13 | fan_in_init = 1 / num_prev_neurons ** 0.5
14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
16 |
17 | weights = tf.get_variable("weights", shape, initializer=weight_init)
18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
19 |
20 | dot = tf.matmul(input_layer, weights) + biases
21 |
22 | if is_output:
23 | return dot
24 |
25 | relu = tf.nn.relu(dot)
26 | return relu
27 |
28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False):
29 | num_prev_neurons = int(input_layer.shape[1])
30 | shape = [num_prev_neurons, num_next_neurons]
31 |
32 |
33 | fan_in_init = 1 / num_prev_neurons ** 0.5
34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
36 |
37 | weights = tf.get_variable("weights", shape, initializer=weight_init)
38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
39 |
40 | dot = tf.matmul(input_layer, weights) + biases
41 |
42 | if is_output:
43 | return dot
44 |
45 | relu = tf.nn.relu(dot)
46 | return relu
47 |
48 |
49 | # Below function prints out options and environment specified by user
50 | def print_summary(FLAGS,env):
51 |
52 | print("\n- - - - - - - - - - -")
53 | print("Task Summary: ","\n")
54 | print("Environment: ", env.name)
55 | print("Number of Layers: ", FLAGS.layers)
56 | print("Time Limit per Layer: ", FLAGS.time_scale)
57 | print("Max Episode Time Steps: ", env.max_actions)
58 | print("Retrain: ", FLAGS.retrain)
59 | print("Test: ", FLAGS.test)
60 | print("Visualize: ", FLAGS.show)
61 | print("- - - - - - - - - - -", "\n\n")
62 |
63 |
64 | # Below function ensures environment configurations were properly entered
65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action):
66 |
67 | # Ensure model file is an ".xml" file
68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file"
69 |
70 | # Ensure upper bounds of range is >= lower bound of range
71 | if goal_space_train is not None:
72 | for i in range(len(goal_space_train)):
73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound"
74 |
75 | if goal_space_test is not None:
76 | for i in range(len(goal_space_test)):
77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound"
78 |
79 | for i in range(len(initial_state_space)):
80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound"
81 |
82 | for i in range(len(subgoal_bounds)):
83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound"
84 |
85 | # Make sure end goal spaces and thresholds have same first dimension
86 | if goal_space_train is not None and goal_space_test is not None:
87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension"
88 |
89 | # Makde sure suboal spaces and thresholds have same dimensions
90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension"
91 |
92 | # Ensure max action and timesteps_per_action are postive integers
93 | assert max_actions > 0, "Max actions should be a positive integer"
94 |
95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer"
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/.DS_Store
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical Actor-Critc (HAC)
2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions.
3 |
4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI.
5 |
6 | To run HAC with your own agents and MuJoCo environments, you need to complete the template in the *"design_agent_and_env.py"* file. The *"example_designs"* folder contains other examples of design templates that build different agents in the UR5 reacher and inverted pendulum environments.
7 |
8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu.
9 |
10 | ## UPDATE LOG
11 |
12 | ### 10/12/2018 - Key Changes
13 | 1. Bounded Q-Values
14 |
15 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value.
16 |
17 | 2. Removed Target Networks
18 |
19 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file.
20 |
21 | 3. Centralized Design Template
22 |
23 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy.
24 |
25 | 4. Added UR5 Reacher Environment
26 |
27 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly.
28 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/actor.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/actor.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/agent.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/critic.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/design_agent_and_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/design_agent_and_env.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/environment.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/environment.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/experience_buffer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/experience_buffer.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/layer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/layer.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/options.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/options.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/run_HAC.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/run_HAC.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/actor.py:
--------------------------------------------------------------------------------
1 | import tensorflow.compat.v1 as tf
2 | tf.disable_v2_behavior()
3 | import numpy as np
4 | from utils import layer
5 |
6 |
7 | class Actor():
8 |
9 | def __init__(self,
10 | sess,
11 | env,
12 | batch_size,
13 | layer_number,
14 | FLAGS,
15 | learning_rate=0.001,
16 | tau=0.05):
17 |
18 | self.sess = sess
19 |
20 | # Determine range of actor network outputs. This will be used to configure outer layer of neural network
21 | if layer_number == 0:
22 | self.action_space_bounds = env.action_bounds
23 | self.action_offset = env.action_offset
24 | else:
25 | # Determine symmetric range of subgoal space and offset
26 | self.action_space_bounds = env.subgoal_bounds_symmetric
27 | self.action_offset = env.subgoal_bounds_offset
28 |
29 | # Dimensions of action will depend on layer level
30 | if layer_number == 0:
31 | self.action_space_size = env.action_dim
32 | else:
33 | self.action_space_size = env.subgoal_dim
34 |
35 | self.actor_name = 'actor_' + str(layer_number)
36 |
37 | # Dimensions of goal placeholder will differ depending on layer level
38 | if layer_number == FLAGS.layers - 1:
39 | self.goal_dim = env.end_goal_dim
40 | else:
41 | self.goal_dim = env.subgoal_dim
42 |
43 | self.state_dim = env.state_dim
44 |
45 | self.learning_rate = learning_rate
46 | # self.exploration_policies = exploration_policies
47 | self.tau = tau
48 | # self.batch_size = batch_size
49 | self.batch_size = tf.placeholder(tf.float32)
50 |
51 | self.state_ph = tf.placeholder(tf.float32, shape=(None, self.state_dim))
52 | self.goal_ph = tf.placeholder(tf.float32, shape=(None, self.goal_dim))
53 | self.features_ph = tf.concat([self.state_ph, self.goal_ph], axis=1)
54 |
55 | # Create actor network
56 | self.infer = self.create_nn(self.features_ph)
57 |
58 | # Target network code "repurposed" from Patrick Emani :^)
59 | self.weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name]
60 | # self.num_weights = len(self.weights)
61 |
62 | # Create target actor network
63 | self.target = self.create_nn(self.features_ph, name = self.actor_name + '_target')
64 | self.target_weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name][len(self.weights):]
65 |
66 | self.update_target_weights = \
67 | [self.target_weights[i].assign(tf.multiply(self.weights[i], self.tau) +
68 | tf.multiply(self.target_weights[i], 1. - self.tau))
69 | for i in range(len(self.target_weights))]
70 |
71 | self.action_derivs = tf.placeholder(tf.float32, shape=(None, self.action_space_size))
72 | self.unnormalized_actor_gradients = tf.gradients(self.infer, self.weights, -self.action_derivs)
73 | self.policy_gradient = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients))
74 |
75 | # self.policy_gradient = tf.gradients(self.infer, self.weights, -self.action_derivs)
76 | self.train = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(self.policy_gradient, self.weights))
77 |
78 |
79 | def get_action(self, state, goal):
80 | actions = self.sess.run(self.infer,
81 | feed_dict={
82 | self.state_ph: state,
83 | self.goal_ph: goal
84 | })
85 |
86 | return actions
87 |
88 | def get_target_action(self, state, goal):
89 | actions = self.sess.run(self.target,
90 | feed_dict={
91 | self.state_ph: state,
92 | self.goal_ph: goal
93 | })
94 |
95 | return actions
96 |
97 | def update(self, state, goal, action_derivs, next_batch_size):
98 | weights, policy_grad, _ = self.sess.run([self.weights, self.policy_gradient, self.train],
99 | feed_dict={
100 | self.state_ph: state,
101 | self.goal_ph: goal,
102 | self.action_derivs: action_derivs,
103 | self.batch_size: next_batch_size
104 | })
105 |
106 | return len(weights)
107 |
108 | # self.sess.run(self.update_target_weights)
109 |
110 | # def create_nn(self, state, goal, name='actor'):
111 | def create_nn(self, features, name=None):
112 |
113 | if name is None:
114 | name = self.actor_name
115 |
116 | with tf.variable_scope(name + '_fc_1'):
117 | fc1 = layer(features, 64)
118 | with tf.variable_scope(name + '_fc_2'):
119 | fc2 = layer(fc1, 64)
120 | with tf.variable_scope(name + '_fc_3'):
121 | fc3 = layer(fc2, 64)
122 | with tf.variable_scope(name + '_fc_4'):
123 | fc4 = layer(fc3, self.action_space_size, is_output=True)
124 |
125 | output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset
126 |
127 | return output
128 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/experience_buffer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class ExperienceBuffer():
4 |
5 | def __init__(self, max_buffer_size, batch_size):
6 | self.size = 0
7 | self.max_buffer_size = max_buffer_size
8 | self.experiences = []
9 | self.batch_size = batch_size
10 |
11 | def add(self, experience):
12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')'
13 | assert type(experience[5]) == bool
14 |
15 | self.experiences.append(experience)
16 | self.size += 1
17 |
18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance
19 | if self.size >= self.max_buffer_size:
20 | beg_index = int(np.floor(self.max_buffer_size/6))
21 | self.experiences = self.experiences[beg_index:]
22 | self.size -= beg_index
23 |
24 | def get_batch(self):
25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], []
26 | dist = np.random.randint(0, high=self.size, size=min(self.size, self.batch_size))
27 |
28 | for i in dist:
29 | states.append(self.experiences[i][0])
30 | actions.append(self.experiences[i][1])
31 | rewards.append(self.experiences[i][2])
32 | new_states.append(self.experiences[i][3])
33 | goals.append(self.experiences[i][4])
34 | is_terminals.append(self.experiences[i][5])
35 |
36 | return states, actions, rewards, new_states, goals, is_terminals
37 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/initialize_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified
3 | by the user and instantiates the environment and agent.
4 | """
5 |
6 | from design_agent_and_env import design_agent_and_env
7 | from options import parse_options
8 | from agent import Agent
9 | from run_HAC import run_HAC
10 |
11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file.
12 | FLAGS = parse_options()
13 |
14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file.
15 | agent, env = design_agent_and_env(FLAGS)
16 |
17 | # Begin training
18 | run_HAC(FLAGS,env,agent)
19 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.data-00000-of-00001
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.index
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.meta
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/models/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "/Users/andrewlevy/Documents/GitHub/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99"
2 | all_model_checkpoint_paths: "/Users/andrewlevy/Documents/GitHub/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99"
3 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The dm_control Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 |
16 | """Functions to manage the common assets for domains."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import os
23 | from dm_control.utils import resources
24 |
25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__))
26 | _FILENAMES = [
27 | "common/materials.xml",
28 | "common/skybox.xml",
29 | "common/visual.xml",
30 | ]
31 |
32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename))
33 | for filename in _FILENAMES}
34 |
35 |
36 | def read_model(model_filename):
37 | """Reads a model XML file and returns its contents as a string."""
38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename))
39 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/common/materials.xml:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/common/skybox.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/common/visual.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_base.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_forearm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_shoulder.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_upperarm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist1.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup_2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup_3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_finger_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/new_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/new_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_finger_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/red_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/red_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/upd_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/upd_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_base.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_forearm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_shoulder.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_upperarm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist1.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/mujoco_files/pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/options.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | """
4 | Below are training options user can specify in command line.
5 |
6 | Options Include:
7 |
8 | 1. Retrain boolean ("--retrain")
9 | - If included, actor and critic neural network parameters are reset
10 |
11 | 2. Testing boolean ("--test")
12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks.
13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress.
14 |
15 | 3. Show boolean ("--show")
16 | - If included, training will be visualized
17 |
18 | 4. Train Only boolean ("--train_only")
19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing
20 |
21 | 5. Verbosity boolean ("--verbose")
22 | - If included, summary of each transition will be printed
23 |
24 | 6. All Trans boolean ("--all_trans")
25 | - If included, all transitions including (i) hindsight action, (ii) subgoal penalty, (iii) preliminary HER, and (iv) final HER transitions will be printed. Use below options to print out specific types of transitions.
26 |
27 | 7. Hindsight Action trans boolean ("hind_action")
28 | - If included, prints hindsight actions transitions for each level
29 |
30 | 8. Subgoal Penalty trans ("penalty")
31 | - If included, prints the subgoal penalty transitions
32 |
33 | 9. Preliminary HER trans ("prelim_HER")
34 | -If included, prints the preliminary HER transitions (i.e., with TBD reward and goal components)
35 |
36 | 10. HER trans ("HER")
37 | - If included, prints the final HER transitions for each level
38 |
39 | 11. Show Q-values ("--Q_values")
40 | - Show Q-values for each action by each level
41 |
42 | """
43 |
44 | def parse_options():
45 | parser = argparse.ArgumentParser()
46 |
47 | parser.add_argument(
48 | '--retrain',
49 | action='store_true',
50 | help='Include to reset policy'
51 | )
52 |
53 | parser.add_argument(
54 | '--test',
55 | action='store_true',
56 | help='Include to fix current policy'
57 | )
58 |
59 | parser.add_argument(
60 | '--show',
61 | action='store_true',
62 | help='Include to visualize training'
63 | )
64 |
65 | parser.add_argument(
66 | '--train_only',
67 | action='store_true',
68 | help='Include to use training mode only'
69 | )
70 |
71 | parser.add_argument(
72 | '--verbose',
73 | action='store_true',
74 | help='Print summary of each transition'
75 | )
76 |
77 | parser.add_argument(
78 | '--all_trans',
79 | action='store_true',
80 | help='Print summary of each transition'
81 | )
82 |
83 | parser.add_argument(
84 | '--hind_action',
85 | action='store_true',
86 | help='Print summary of each transition'
87 | )
88 |
89 | parser.add_argument(
90 | '--penalty',
91 | action='store_true',
92 | help='Print summary of each transition'
93 | )
94 |
95 | parser.add_argument(
96 | '--prelim_HER',
97 | action='store_true',
98 | help='Print summary of each transition'
99 | )
100 |
101 | parser.add_argument(
102 | '--HER',
103 | action='store_true',
104 | help='Print summary of each transition'
105 | )
106 |
107 | parser.add_argument(
108 | '--Q_values',
109 | action='store_true',
110 | help='Print summary of each transition'
111 | )
112 |
113 | FLAGS, unparsed = parser.parse_known_args()
114 |
115 |
116 | return FLAGS
117 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/run_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file.
3 | """
4 |
5 | import pickle as cpickle
6 | import agent as Agent
7 | from utils import print_summary
8 |
9 | NUM_BATCH = 1000
10 | TEST_FREQ = 2
11 |
12 | num_test_episodes = 100
13 |
14 | def run_HAC(FLAGS,env,agent):
15 |
16 | # Print task summary
17 | print_summary(FLAGS,env)
18 |
19 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress
20 | mix_train_test = False
21 | if not FLAGS.test and not FLAGS.train_only:
22 | mix_train_test = True
23 |
24 | # Track total training episodes completed
25 | total_episodes = 0
26 |
27 | for batch in range(NUM_BATCH):
28 |
29 | num_episodes = agent.other_params["num_exploration_episodes"]
30 |
31 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing
32 | if mix_train_test and batch % TEST_FREQ == 0:
33 | print("\n--- TESTING ---")
34 | agent.FLAGS.test = True
35 | num_episodes = num_test_episodes
36 |
37 | # Reset successful episode counter
38 | successful_episodes = 0
39 |
40 | for episode in range(num_episodes):
41 |
42 | print("\nBatch %d, Episode %d" % (batch, episode))
43 |
44 | # Train for an episode
45 | success = agent.train(env, episode, total_episodes)
46 |
47 | if FLAGS.train_only or (mix_train_test and batch % TEST_FREQ != 0):
48 | total_episodes += 1
49 |
50 | if success:
51 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode))
52 |
53 | # Increment successful episode counter if applicable
54 | if mix_train_test and batch % TEST_FREQ == 0:
55 | successful_episodes += 1
56 |
57 | # Save agent
58 | agent.save_model(episode)
59 |
60 | # Finish evaluating policy if tested prior batch
61 | if mix_train_test and batch % TEST_FREQ == 0:
62 |
63 | # Log performance
64 | success_rate = successful_episodes / num_test_episodes * 100
65 | print("\nTesting Success Rate %.2f%%" % success_rate)
66 | agent.log_performance(success_rate)
67 | agent.FLAGS.test = False
68 |
69 | print("\n--- END TESTING ---\n")
70 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_2_levels/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow.compat.v1 as tf
2 | tf.disable_v2_behavior()
3 |
4 | def layer(input_layer, num_next_neurons, is_output=False):
5 | num_prev_neurons = int(input_layer.shape[1])
6 | shape = [num_prev_neurons, num_next_neurons]
7 |
8 | if is_output:
9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
11 | else:
12 | # 1/sqrt(f)
13 | fan_in_init = 1 / num_prev_neurons ** 0.5
14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
16 |
17 | weights = tf.get_variable("weights", shape, initializer=weight_init)
18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
19 |
20 | dot = tf.matmul(input_layer, weights) + biases
21 |
22 | if is_output:
23 | return dot
24 |
25 | relu = tf.nn.relu(dot)
26 | return relu
27 |
28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False):
29 | num_prev_neurons = int(input_layer.shape[1])
30 | shape = [num_prev_neurons, num_next_neurons]
31 |
32 |
33 | fan_in_init = 1 / num_prev_neurons ** 0.5
34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
36 |
37 | weights = tf.get_variable("weights", shape, initializer=weight_init)
38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
39 |
40 | dot = tf.matmul(input_layer, weights) + biases
41 |
42 | if is_output:
43 | return dot
44 |
45 | relu = tf.nn.relu(dot)
46 | return relu
47 |
48 |
49 | # Below function prints out options and environment specified by user
50 | def print_summary(FLAGS,env):
51 |
52 | print("\n- - - - - - - - - - -")
53 | print("Task Summary: ","\n")
54 | print("Environment: ", env.name)
55 | print("Number of Layers: ", FLAGS.layers)
56 | print("Time Limit per Layer: ", FLAGS.time_scale)
57 | print("Max Episode Time Steps: ", env.max_actions)
58 | print("Retrain: ", FLAGS.retrain)
59 | print("Test: ", FLAGS.test)
60 | print("Visualize: ", FLAGS.show)
61 | print("- - - - - - - - - - -", "\n\n")
62 |
63 |
64 | # Below function ensures environment configurations were properly entered
65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action):
66 |
67 | # Ensure model file is an ".xml" file
68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file"
69 |
70 | # Ensure upper bounds of range is >= lower bound of range
71 | if goal_space_train is not None:
72 | for i in range(len(goal_space_train)):
73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound"
74 |
75 | if goal_space_test is not None:
76 | for i in range(len(goal_space_test)):
77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound"
78 |
79 | for i in range(len(initial_state_space)):
80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound"
81 |
82 | for i in range(len(subgoal_bounds)):
83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound"
84 |
85 | # Make sure end goal spaces and thresholds have same first dimension
86 | if goal_space_train is not None and goal_space_test is not None:
87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension"
88 |
89 | # Makde sure suboal spaces and thresholds have same dimensions
90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension"
91 |
92 | # Ensure max action and timesteps_per_action are postive integers
93 | assert max_actions > 0, "Max actions should be a positive integer"
94 |
95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer"
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical Actor-Critc (HAC)
2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions.
3 |
4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI.
5 |
6 | To run HAC with your own agents and MuJoCo environments, you need to complete the template in the *"design_agent_and_env.py"* file. The *"example_designs"* folder contains other examples of design templates that build different agents in the UR5 reacher and inverted pendulum environments.
7 |
8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu.
9 |
10 | ## UPDATE LOG
11 |
12 | ### 10/12/2018 - Key Changes
13 | 1. Bounded Q-Values
14 |
15 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value.
16 |
17 | 2. Removed Target Networks
18 |
19 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file.
20 |
21 | 3. Centralized Design Template
22 |
23 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy.
24 |
25 | 4. Added UR5 Reacher Environment
26 |
27 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly.
28 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/actor.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/actor.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/agent.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/critic.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/design_agent_and_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/design_agent_and_env.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/environment.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/environment.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/experience_buffer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/experience_buffer.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/layer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/layer.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/options.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/options.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/run_HAC.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/run_HAC.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/experience_buffer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class ExperienceBuffer():
4 |
5 | def __init__(self, max_buffer_size, batch_size):
6 | self.size = 0
7 | self.max_buffer_size = max_buffer_size
8 | self.experiences = []
9 | self.batch_size = batch_size
10 |
11 | def add(self, experience):
12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')'
13 | assert type(experience[5]) == bool
14 |
15 | self.experiences.append(experience)
16 | self.size += 1
17 |
18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance
19 | if self.size >= self.max_buffer_size:
20 | beg_index = int(np.floor(self.max_buffer_size/6))
21 | self.experiences = self.experiences[beg_index:]
22 | self.size -= beg_index
23 |
24 | def get_batch(self):
25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], []
26 | dist = np.random.randint(0, high=self.size, size=min(self.size, self.batch_size))
27 |
28 | for i in dist:
29 | states.append(self.experiences[i][0])
30 | actions.append(self.experiences[i][1])
31 | rewards.append(self.experiences[i][2])
32 | new_states.append(self.experiences[i][3])
33 | goals.append(self.experiences[i][4])
34 | is_terminals.append(self.experiences[i][5])
35 |
36 | return states, actions, rewards, new_states, goals, is_terminals
37 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/initialize_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified
3 | by the user and instantiates the environment and agent.
4 | """
5 |
6 | from design_agent_and_env import design_agent_and_env
7 | from options import parse_options
8 | from agent import Agent
9 | from run_HAC import run_HAC
10 |
11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file.
12 | FLAGS = parse_options()
13 |
14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file.
15 | agent, env = design_agent_and_env(FLAGS)
16 |
17 | # Begin training
18 | run_HAC(FLAGS,env,agent)
19 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.data-00000-of-00001
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.index
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.meta
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/models/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "/Users/andrewlevy/Documents/GitHub/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99"
2 | all_model_checkpoint_paths: "/Users/andrewlevy/Documents/GitHub/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99"
3 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The dm_control Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 |
16 | """Functions to manage the common assets for domains."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import os
23 | from dm_control.utils import resources
24 |
25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__))
26 | _FILENAMES = [
27 | "common/materials.xml",
28 | "common/skybox.xml",
29 | "common/visual.xml",
30 | ]
31 |
32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename))
33 | for filename in _FILENAMES}
34 |
35 |
36 | def read_model(model_filename):
37 | """Reads a model XML file and returns its contents as a string."""
38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename))
39 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/common/materials.xml:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/common/skybox.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/common/visual.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_base.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_forearm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_shoulder.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_upperarm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist1.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup_2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup_3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_finger_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/new_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/new_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_finger_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/red_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/red_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/upd_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/upd_solo_cup.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_base.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_forearm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_shoulder.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_upperarm.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist1.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist2.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist3.stl
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/mujoco_files/pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/options.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | """
4 | Below are training options user can specify in command line.
5 |
6 | Options Include:
7 |
8 | 1. Retrain boolean ("--retrain")
9 | - If included, actor and critic neural network parameters are reset
10 |
11 | 2. Testing boolean ("--test")
12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks.
13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress.
14 |
15 | 3. Show boolean ("--show")
16 | - If included, training will be visualized
17 |
18 | 4. Train Only boolean ("--train_only")
19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing
20 |
21 | 5. Verbosity boolean ("--verbose")
22 | - If included, summary of each transition will be printed
23 |
24 | 6. All Trans boolean ("--all_trans")
25 | - If included, all transitions including (i) hindsight action, (ii) subgoal penalty, (iii) preliminary HER, and (iv) final HER transitions will be printed. Use below options to print out specific types of transitions.
26 |
27 | 7. Hindsight Action trans boolean ("hind_action")
28 | - If included, prints hindsight actions transitions for each level
29 |
30 | 8. Subgoal Penalty trans ("penalty")
31 | - If included, prints the subgoal penalty transitions
32 |
33 | 9. Preliminary HER trans ("prelim_HER")
34 | -If included, prints the preliminary HER transitions (i.e., with TBD reward and goal components)
35 |
36 | 10. HER trans ("HER")
37 | - If included, prints the final HER transitions for each level
38 |
39 | 11. Show Q-values ("--Q_values")
40 | - Show Q-values for each action by each level
41 |
42 | """
43 |
44 | def parse_options():
45 | parser = argparse.ArgumentParser()
46 |
47 | parser.add_argument(
48 | '--retrain',
49 | action='store_true',
50 | help='Include to reset policy'
51 | )
52 |
53 | parser.add_argument(
54 | '--test',
55 | action='store_true',
56 | help='Include to fix current policy'
57 | )
58 |
59 | parser.add_argument(
60 | '--show',
61 | action='store_true',
62 | help='Include to visualize training'
63 | )
64 |
65 | parser.add_argument(
66 | '--train_only',
67 | action='store_true',
68 | help='Include to use training mode only'
69 | )
70 |
71 | parser.add_argument(
72 | '--verbose',
73 | action='store_true',
74 | help='Print summary of each transition'
75 | )
76 |
77 | parser.add_argument(
78 | '--all_trans',
79 | action='store_true',
80 | help='Print summary of each transition'
81 | )
82 |
83 | parser.add_argument(
84 | '--hind_action',
85 | action='store_true',
86 | help='Print summary of each transition'
87 | )
88 |
89 | parser.add_argument(
90 | '--penalty',
91 | action='store_true',
92 | help='Print summary of each transition'
93 | )
94 |
95 | parser.add_argument(
96 | '--prelim_HER',
97 | action='store_true',
98 | help='Print summary of each transition'
99 | )
100 |
101 | parser.add_argument(
102 | '--HER',
103 | action='store_true',
104 | help='Print summary of each transition'
105 | )
106 |
107 | parser.add_argument(
108 | '--Q_values',
109 | action='store_true',
110 | help='Print summary of each transition'
111 | )
112 |
113 | FLAGS, unparsed = parser.parse_known_args()
114 |
115 |
116 | return FLAGS
117 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/run_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file.
3 | """
4 |
5 | import pickle as cpickle
6 | import agent as Agent
7 | from utils import print_summary
8 |
9 | NUM_BATCH = 1000
10 | TEST_FREQ = 2
11 |
12 | num_test_episodes = 100
13 |
14 | def run_HAC(FLAGS,env,agent):
15 |
16 | # Print task summary
17 | print_summary(FLAGS,env)
18 |
19 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress
20 | mix_train_test = False
21 | if not FLAGS.test and not FLAGS.train_only:
22 | mix_train_test = True
23 |
24 | # Track total training episodes completed
25 | total_episodes = 0
26 |
27 | for batch in range(NUM_BATCH):
28 |
29 | num_episodes = agent.other_params["num_exploration_episodes"]
30 |
31 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing
32 | if mix_train_test and batch % TEST_FREQ == 0:
33 | print("\n--- TESTING ---")
34 | agent.FLAGS.test = True
35 | num_episodes = num_test_episodes
36 |
37 | # Reset successful episode counter
38 | successful_episodes = 0
39 |
40 | for episode in range(num_episodes):
41 |
42 | print("\nBatch %d, Episode %d" % (batch, episode))
43 |
44 | # Train for an episode
45 | success = agent.train(env, episode, total_episodes)
46 |
47 | if FLAGS.train_only or (mix_train_test and batch % TEST_FREQ != 0):
48 | total_episodes += 1
49 |
50 | if success:
51 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode))
52 |
53 | # Increment successful episode counter if applicable
54 | if mix_train_test and batch % TEST_FREQ == 0:
55 | successful_episodes += 1
56 |
57 | # Save agent
58 | agent.save_model(episode)
59 |
60 | # Finish evaluating policy if tested prior batch
61 | if mix_train_test and batch % TEST_FREQ == 0:
62 |
63 | # Log performance
64 | success_rate = successful_episodes / num_test_episodes * 100
65 | print("\nTesting Success Rate %.2f%%" % success_rate)
66 | agent.log_performance(success_rate)
67 | agent.FLAGS.test = False
68 |
69 | print("\n--- END TESTING ---\n")
70 |
--------------------------------------------------------------------------------
/ant_environments/ant_reacher_3_levels/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow.compat.v1 as tf
2 | tf.disable_v2_behavior()
3 |
4 | def layer(input_layer, num_next_neurons, is_output=False):
5 | num_prev_neurons = int(input_layer.shape[1])
6 | shape = [num_prev_neurons, num_next_neurons]
7 |
8 | if is_output:
9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
11 | else:
12 | # 1/sqrt(f)
13 | fan_in_init = 1 / num_prev_neurons ** 0.5
14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
16 |
17 | weights = tf.get_variable("weights", shape, initializer=weight_init)
18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
19 |
20 | dot = tf.matmul(input_layer, weights) + biases
21 |
22 | if is_output:
23 | return dot
24 |
25 | relu = tf.nn.relu(dot)
26 | return relu
27 |
28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False):
29 | num_prev_neurons = int(input_layer.shape[1])
30 | shape = [num_prev_neurons, num_next_neurons]
31 |
32 |
33 | fan_in_init = 1 / num_prev_neurons ** 0.5
34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
36 |
37 | weights = tf.get_variable("weights", shape, initializer=weight_init)
38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
39 |
40 | dot = tf.matmul(input_layer, weights) + biases
41 |
42 | if is_output:
43 | return dot
44 |
45 | relu = tf.nn.relu(dot)
46 | return relu
47 |
48 |
49 | # Below function prints out options and environment specified by user
50 | def print_summary(FLAGS,env):
51 |
52 | print("\n- - - - - - - - - - -")
53 | print("Task Summary: ","\n")
54 | print("Environment: ", env.name)
55 | print("Number of Layers: ", FLAGS.layers)
56 | print("Time Limit per Layer: ", FLAGS.time_scale)
57 | print("Max Episode Time Steps: ", env.max_actions)
58 | print("Retrain: ", FLAGS.retrain)
59 | print("Test: ", FLAGS.test)
60 | print("Visualize: ", FLAGS.show)
61 | print("- - - - - - - - - - -", "\n\n")
62 |
63 |
64 | # Below function ensures environment configurations were properly entered
65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action):
66 |
67 | # Ensure model file is an ".xml" file
68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file"
69 |
70 | # Ensure upper bounds of range is >= lower bound of range
71 | if goal_space_train is not None:
72 | for i in range(len(goal_space_train)):
73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound"
74 |
75 | if goal_space_test is not None:
76 | for i in range(len(goal_space_test)):
77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound"
78 |
79 | for i in range(len(initial_state_space)):
80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound"
81 |
82 | for i in range(len(subgoal_bounds)):
83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound"
84 |
85 | # Make sure end goal spaces and thresholds have same first dimension
86 | if goal_space_train is not None and goal_space_test is not None:
87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension"
88 |
89 | # Makde sure suboal spaces and thresholds have same dimensions
90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension"
91 |
92 | # Ensure max action and timesteps_per_action are postive integers
93 | assert max_actions > 0, "Max actions should be a positive integer"
94 |
95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer"
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/experience_buffer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class ExperienceBuffer():
4 |
5 | def __init__(self, max_buffer_size, batch_size):
6 | self.size = 0
7 | self.max_buffer_size = max_buffer_size
8 | self.experiences = []
9 | self.batch_size = batch_size
10 |
11 | def add(self, experience):
12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')'
13 | assert type(experience[5]) == bool
14 |
15 | self.experiences.append(experience)
16 | self.size += 1
17 |
18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance
19 | if self.size >= self.max_buffer_size:
20 | beg_index = int(np.floor(self.max_buffer_size/6))
21 | self.experiences = self.experiences[beg_index:]
22 | self.size -= beg_index
23 |
24 | def get_batch(self):
25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], []
26 | dist = np.random.randint(0, high=self.size, size=self.batch_size)
27 |
28 | for i in dist:
29 | states.append(self.experiences[i][0])
30 | actions.append(self.experiences[i][1])
31 | rewards.append(self.experiences[i][2])
32 | new_states.append(self.experiences[i][3])
33 | goals.append(self.experiences[i][4])
34 | is_terminals.append(self.experiences[i][5])
35 |
36 | return states, actions, rewards, new_states, goals, is_terminals
37 |
--------------------------------------------------------------------------------
/initialize_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified
3 | by the user and instantiates the environment and agent.
4 | """
5 |
6 | from design_agent_and_env import design_agent_and_env
7 | from options import parse_options
8 | from agent import Agent
9 | from run_HAC import run_HAC
10 |
11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file.
12 | FLAGS = parse_options()
13 |
14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file.
15 | agent, env = design_agent_and_env(FLAGS)
16 |
17 | # Begin training
18 | run_HAC(FLAGS,env,agent)
19 |
--------------------------------------------------------------------------------
/mujoco_files/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The dm_control Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 |
16 | """Functions to manage the common assets for domains."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import os
23 | from dm_control.utils import resources
24 |
25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__))
26 | _FILENAMES = [
27 | "common/materials.xml",
28 | "common/skybox.xml",
29 | "common/visual.xml",
30 | ]
31 |
32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename))
33 | for filename in _FILENAMES}
34 |
35 |
36 | def read_model(model_filename):
37 | """Reads a model XML file and returns its contents as a string."""
38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename))
39 |
--------------------------------------------------------------------------------
/mujoco_files/common/materials.xml:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/mujoco_files/common/skybox.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/mujoco_files/common/visual.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/c_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_base.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/c_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_forearm.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/c_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_shoulder.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/c_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_upperarm.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/c_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_wrist1.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/c_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_wrist2.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/c_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_wrist3.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/glass_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/glass_cup.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/glass_cup_2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/glass_cup_2.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/glass_cup_3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/glass_cup_3.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/inner_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/inner_finger_coarse.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/inner_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/inner_finger_fine.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/inner_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/inner_knuckle_coarse.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/inner_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/inner_knuckle_fine.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/new_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/new_solo_cup.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/outer_finger_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/outer_finger_coarse.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/outer_finger_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/outer_finger_fine.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/outer_knuckle_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/outer_knuckle_coarse.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/outer_knuckle_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/outer_knuckle_fine.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/red_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/red_solo_cup.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/smaller_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/smaller_solo_cup.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/solo_cup.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/upd_solo_cup.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/upd_solo_cup.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/v_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_base.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/v_forearm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_forearm.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/v_shoulder.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_shoulder.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/v_upperarm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_upperarm.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/v_wrist1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_wrist1.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/v_wrist2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_wrist2.stl
--------------------------------------------------------------------------------
/mujoco_files/muj_gripper/v_wrist3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_wrist3.stl
--------------------------------------------------------------------------------
/mujoco_files/pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/options.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | """
4 | Below are training options user can specify in command line.
5 |
6 | Options Include:
7 |
8 | 1. Retrain boolean
9 | - If included, actor and critic neural network parameters are reset
10 |
11 | 2. Testing boolean
12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks.
13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress.
14 |
15 | 3. Show boolean
16 | - If included, training will be visualized
17 |
18 | 4. Train Only boolean
19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing
20 |
21 | 5. Verbosity boolean
22 | - If included, summary of each transition will be printed
23 | """
24 |
25 | def parse_options():
26 | parser = argparse.ArgumentParser()
27 |
28 | parser.add_argument(
29 | '--retrain',
30 | action='store_true',
31 | help='Include to reset policy'
32 | )
33 |
34 | parser.add_argument(
35 | '--test',
36 | action='store_true',
37 | help='Include to fix current policy'
38 | )
39 |
40 | parser.add_argument(
41 | '--show',
42 | action='store_true',
43 | help='Include to visualize training'
44 | )
45 |
46 | parser.add_argument(
47 | '--train_only',
48 | action='store_true',
49 | help='Include to use training mode only'
50 | )
51 |
52 | parser.add_argument(
53 | '--verbose',
54 | action='store_true',
55 | help='Print summary of each transition'
56 | )
57 |
58 | FLAGS, unparsed = parser.parse_known_args()
59 |
60 |
61 | return FLAGS
62 |
--------------------------------------------------------------------------------
/run_HAC.py:
--------------------------------------------------------------------------------
1 | """
2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file.
3 | """
4 |
5 | import pickle as cpickle
6 | import agent as Agent
7 | from utils import print_summary
8 |
9 | NUM_BATCH = 1000
10 | TEST_FREQ = 2
11 |
12 | num_test_episodes = 100
13 |
14 | def run_HAC(FLAGS,env,agent):
15 |
16 | # Print task summary
17 | print_summary(FLAGS,env)
18 |
19 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress
20 | mix_train_test = False
21 | if not FLAGS.test and not FLAGS.train_only:
22 | mix_train_test = True
23 |
24 | for batch in range(NUM_BATCH):
25 |
26 | num_episodes = agent.other_params["num_exploration_episodes"]
27 |
28 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing
29 | if mix_train_test and batch % TEST_FREQ == 0:
30 | print("\n--- TESTING ---")
31 | agent.FLAGS.test = True
32 | num_episodes = num_test_episodes
33 |
34 | # Reset successful episode counter
35 | successful_episodes = 0
36 |
37 | for episode in range(num_episodes):
38 |
39 | print("\nBatch %d, Episode %d" % (batch, episode))
40 |
41 | # Train for an episode
42 | success = agent.train(env, episode)
43 |
44 | if success:
45 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode))
46 |
47 | # Increment successful episode counter if applicable
48 | if mix_train_test and batch % TEST_FREQ == 0:
49 | successful_episodes += 1
50 |
51 | # Save agent
52 | agent.save_model(episode)
53 |
54 | # Finish evaluating policy if tested prior batch
55 | if mix_train_test and batch % TEST_FREQ == 0:
56 |
57 | # Log performance
58 | success_rate = successful_episodes / num_test_episodes * 100
59 | print("\nTesting Success Rate %.2f%%" % success_rate)
60 | agent.log_performance(success_rate)
61 | agent.FLAGS.test = False
62 |
63 | print("\n--- END TESTING ---\n")
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow.compat.v1 as tf
2 | tf.disable_v2_behavior()
3 |
4 | def layer(input_layer, num_next_neurons, is_output=False):
5 | num_prev_neurons = int(input_layer.shape[1])
6 | shape = [num_prev_neurons, num_next_neurons]
7 |
8 | if is_output:
9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)
11 | else:
12 | # 1/sqrt(f)
13 | fan_in_init = 1 / num_prev_neurons ** 0.5
14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
16 |
17 | weights = tf.get_variable("weights", shape, initializer=weight_init)
18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
19 |
20 | dot = tf.matmul(input_layer, weights) + biases
21 |
22 | if is_output:
23 | return dot
24 |
25 | relu = tf.nn.relu(dot)
26 | return relu
27 |
28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False):
29 | num_prev_neurons = int(input_layer.shape[1])
30 | shape = [num_prev_neurons, num_next_neurons]
31 |
32 |
33 | fan_in_init = 1 / num_prev_neurons ** 0.5
34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init)
36 |
37 | weights = tf.get_variable("weights", shape, initializer=weight_init)
38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init)
39 |
40 | dot = tf.matmul(input_layer, weights) + biases
41 |
42 | if is_output:
43 | return dot
44 |
45 | relu = tf.nn.relu(dot)
46 | return relu
47 |
48 |
49 | # Below function prints out options and environment specified by user
50 | def print_summary(FLAGS,env):
51 |
52 | print("\n- - - - - - - - - - -")
53 | print("Task Summary: ","\n")
54 | print("Environment: ", env.name)
55 | print("Number of Layers: ", FLAGS.layers)
56 | print("Time Limit per Layer: ", FLAGS.time_scale)
57 | print("Max Episode Time Steps: ", env.max_actions)
58 | print("Retrain: ", FLAGS.retrain)
59 | print("Test: ", FLAGS.test)
60 | print("Visualize: ", FLAGS.show)
61 | print("- - - - - - - - - - -", "\n\n")
62 |
63 |
64 | # Below function ensures environment configurations were properly entered
65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action):
66 |
67 | # Ensure model file is an ".xml" file
68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file"
69 |
70 | # Ensure upper bounds of range is >= lower bound of range
71 | if goal_space_train is not None:
72 | for i in range(len(goal_space_train)):
73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound"
74 |
75 | if goal_space_test is not None:
76 | for i in range(len(goal_space_test)):
77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound"
78 |
79 | for i in range(len(initial_state_space)):
80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound"
81 |
82 | for i in range(len(subgoal_bounds)):
83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound"
84 |
85 | # Make sure end goal spaces and thresholds have same first dimension
86 | if goal_space_train is not None and goal_space_test is not None:
87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension"
88 |
89 | # Makde sure suboal spaces and thresholds have same dimensions
90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension"
91 |
92 | # Ensure max action and timesteps_per_action are postive integers
93 | assert max_actions > 0, "Max actions should be a positive integer"
94 |
95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer"
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------