├── .DS_Store ├── LICENSE ├── README.md ├── actor.py ├── agent.py ├── ant_environments ├── .DS_Store ├── ant_four_rooms_2_levels │ ├── README.md │ ├── actor.py │ ├── agent.py │ ├── critic.py │ ├── design_agent_and_env.py │ ├── environment.py │ ├── experience_buffer.py │ ├── initialize_HAC.py │ ├── layer.py │ ├── models │ │ ├── HAC.ckpt-99.data-00000-of-00001 │ │ ├── HAC.ckpt-99.index │ │ ├── HAC.ckpt-99.meta │ │ └── checkpoint │ ├── mujoco_files │ │ ├── ant_four_rooms.xml │ │ ├── ant_reacher.xml │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── materials.xml │ │ │ ├── skybox.xml │ │ │ └── visual.xml │ │ ├── muj_gripper │ │ │ ├── c_base.stl │ │ │ ├── c_forearm.stl │ │ │ ├── c_robotiq_85_gripper_joint_3_L.stl │ │ │ ├── c_robotiq_85_gripper_joint_3_R.stl │ │ │ ├── c_shoulder.stl │ │ │ ├── c_upperarm.stl │ │ │ ├── c_wrist1.stl │ │ │ ├── c_wrist2.stl │ │ │ ├── c_wrist3.stl │ │ │ ├── glass_cup.stl │ │ │ ├── glass_cup_2.stl │ │ │ ├── glass_cup_3.stl │ │ │ ├── inner_finger_coarse.stl │ │ │ ├── inner_finger_fine.stl │ │ │ ├── inner_knuckle_coarse.stl │ │ │ ├── inner_knuckle_fine.stl │ │ │ ├── new_solo_cup.stl │ │ │ ├── outer_finger_coarse.stl │ │ │ ├── outer_finger_fine.stl │ │ │ ├── outer_knuckle_coarse.stl │ │ │ ├── outer_knuckle_fine.stl │ │ │ ├── red_solo_cup.stl │ │ │ ├── robotiq_85_base_link_coarse.stl │ │ │ ├── robotiq_85_base_link_fine.stl │ │ │ ├── smaller_solo_cup.stl │ │ │ ├── solo_cup.stl │ │ │ ├── upd_solo_cup.stl │ │ │ ├── v_base.stl │ │ │ ├── v_forearm.stl │ │ │ ├── v_robotiq_85_gripper_joint_3_L.stl │ │ │ ├── v_robotiq_85_gripper_joint_3_R.stl │ │ │ ├── v_shoulder.stl │ │ │ ├── v_upperarm.stl │ │ │ ├── v_wrist1.stl │ │ │ ├── v_wrist2.stl │ │ │ └── v_wrist3.stl │ │ ├── pendulum.xml │ │ └── ur5.xml │ ├── options.py │ ├── performance_log.p │ ├── run_HAC.py │ └── utils.py ├── ant_four_rooms_3_levels │ ├── README.md │ ├── actor.py │ ├── agent.py │ ├── critic.py │ ├── design_agent_and_env.py │ ├── environment.py │ ├── experience_buffer.py │ ├── initialize_HAC.py │ ├── layer.py │ ├── mujoco_files │ │ ├── ant_four_rooms.xml │ │ ├── ant_reacher.xml │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── materials.xml │ │ │ ├── skybox.xml │ │ │ └── visual.xml │ │ ├── muj_gripper │ │ │ ├── c_base.stl │ │ │ ├── c_forearm.stl │ │ │ ├── c_robotiq_85_gripper_joint_3_L.stl │ │ │ ├── c_robotiq_85_gripper_joint_3_R.stl │ │ │ ├── c_shoulder.stl │ │ │ ├── c_upperarm.stl │ │ │ ├── c_wrist1.stl │ │ │ ├── c_wrist2.stl │ │ │ ├── c_wrist3.stl │ │ │ ├── glass_cup.stl │ │ │ ├── glass_cup_2.stl │ │ │ ├── glass_cup_3.stl │ │ │ ├── inner_finger_coarse.stl │ │ │ ├── inner_finger_fine.stl │ │ │ ├── inner_knuckle_coarse.stl │ │ │ ├── inner_knuckle_fine.stl │ │ │ ├── new_solo_cup.stl │ │ │ ├── outer_finger_coarse.stl │ │ │ ├── outer_finger_fine.stl │ │ │ ├── outer_knuckle_coarse.stl │ │ │ ├── outer_knuckle_fine.stl │ │ │ ├── red_solo_cup.stl │ │ │ ├── robotiq_85_base_link_coarse.stl │ │ │ ├── robotiq_85_base_link_fine.stl │ │ │ ├── smaller_solo_cup.stl │ │ │ ├── solo_cup.stl │ │ │ ├── upd_solo_cup.stl │ │ │ ├── v_base.stl │ │ │ ├── v_forearm.stl │ │ │ ├── v_robotiq_85_gripper_joint_3_L.stl │ │ │ ├── v_robotiq_85_gripper_joint_3_R.stl │ │ │ ├── v_shoulder.stl │ │ │ ├── v_upperarm.stl │ │ │ ├── v_wrist1.stl │ │ │ ├── v_wrist2.stl │ │ │ └── v_wrist3.stl │ │ ├── pendulum.xml │ │ └── ur5.xml │ ├── options.py │ ├── run_HAC.py │ └── utils.py ├── ant_reacher_2_levels │ ├── .DS_Store │ ├── README.md │ ├── __pycache__ │ │ ├── actor.cpython-37.pyc │ │ ├── agent.cpython-37.pyc │ │ ├── critic.cpython-37.pyc │ │ ├── design_agent_and_env.cpython-37.pyc │ │ ├── environment.cpython-37.pyc │ │ ├── experience_buffer.cpython-37.pyc │ │ ├── layer.cpython-37.pyc │ │ ├── options.cpython-37.pyc │ │ ├── run_HAC.cpython-37.pyc │ │ └── utils.cpython-37.pyc │ ├── actor.py │ ├── agent.py │ ├── critic.py │ ├── design_agent_and_env.py │ ├── environment.py │ ├── experience_buffer.py │ ├── initialize_HAC.py │ ├── layer.py │ ├── models │ │ ├── HAC.ckpt-99.data-00000-of-00001 │ │ ├── HAC.ckpt-99.index │ │ ├── HAC.ckpt-99.meta │ │ └── checkpoint │ ├── mujoco_files │ │ ├── ant_reacher.xml │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── materials.xml │ │ │ ├── skybox.xml │ │ │ └── visual.xml │ │ ├── muj_gripper │ │ │ ├── c_base.stl │ │ │ ├── c_forearm.stl │ │ │ ├── c_robotiq_85_gripper_joint_3_L.stl │ │ │ ├── c_robotiq_85_gripper_joint_3_R.stl │ │ │ ├── c_shoulder.stl │ │ │ ├── c_upperarm.stl │ │ │ ├── c_wrist1.stl │ │ │ ├── c_wrist2.stl │ │ │ ├── c_wrist3.stl │ │ │ ├── glass_cup.stl │ │ │ ├── glass_cup_2.stl │ │ │ ├── glass_cup_3.stl │ │ │ ├── inner_finger_coarse.stl │ │ │ ├── inner_finger_fine.stl │ │ │ ├── inner_knuckle_coarse.stl │ │ │ ├── inner_knuckle_fine.stl │ │ │ ├── new_solo_cup.stl │ │ │ ├── outer_finger_coarse.stl │ │ │ ├── outer_finger_fine.stl │ │ │ ├── outer_knuckle_coarse.stl │ │ │ ├── outer_knuckle_fine.stl │ │ │ ├── red_solo_cup.stl │ │ │ ├── robotiq_85_base_link_coarse.stl │ │ │ ├── robotiq_85_base_link_fine.stl │ │ │ ├── smaller_solo_cup.stl │ │ │ ├── solo_cup.stl │ │ │ ├── upd_solo_cup.stl │ │ │ ├── v_base.stl │ │ │ ├── v_forearm.stl │ │ │ ├── v_robotiq_85_gripper_joint_3_L.stl │ │ │ ├── v_robotiq_85_gripper_joint_3_R.stl │ │ │ ├── v_shoulder.stl │ │ │ ├── v_upperarm.stl │ │ │ ├── v_wrist1.stl │ │ │ ├── v_wrist2.stl │ │ │ └── v_wrist3.stl │ │ ├── pendulum.xml │ │ └── ur5.xml │ ├── options.py │ ├── run_HAC.py │ └── utils.py └── ant_reacher_3_levels │ ├── README.md │ ├── __pycache__ │ ├── actor.cpython-37.pyc │ ├── agent.cpython-37.pyc │ ├── critic.cpython-37.pyc │ ├── design_agent_and_env.cpython-37.pyc │ ├── environment.cpython-37.pyc │ ├── experience_buffer.cpython-37.pyc │ ├── layer.cpython-37.pyc │ ├── options.cpython-37.pyc │ ├── run_HAC.cpython-37.pyc │ └── utils.cpython-37.pyc │ ├── actor.py │ ├── agent.py │ ├── critic.py │ ├── design_agent_and_env.py │ ├── environment.py │ ├── experience_buffer.py │ ├── initialize_HAC.py │ ├── layer.py │ ├── models │ ├── HAC.ckpt-99.data-00000-of-00001 │ ├── HAC.ckpt-99.index │ ├── HAC.ckpt-99.meta │ └── checkpoint │ ├── mujoco_files │ ├── ant_reacher.xml │ ├── common │ │ ├── __init__.py │ │ ├── materials.xml │ │ ├── skybox.xml │ │ └── visual.xml │ ├── muj_gripper │ │ ├── c_base.stl │ │ ├── c_forearm.stl │ │ ├── c_robotiq_85_gripper_joint_3_L.stl │ │ ├── c_robotiq_85_gripper_joint_3_R.stl │ │ ├── c_shoulder.stl │ │ ├── c_upperarm.stl │ │ ├── c_wrist1.stl │ │ ├── c_wrist2.stl │ │ ├── c_wrist3.stl │ │ ├── glass_cup.stl │ │ ├── glass_cup_2.stl │ │ ├── glass_cup_3.stl │ │ ├── inner_finger_coarse.stl │ │ ├── inner_finger_fine.stl │ │ ├── inner_knuckle_coarse.stl │ │ ├── inner_knuckle_fine.stl │ │ ├── new_solo_cup.stl │ │ ├── outer_finger_coarse.stl │ │ ├── outer_finger_fine.stl │ │ ├── outer_knuckle_coarse.stl │ │ ├── outer_knuckle_fine.stl │ │ ├── red_solo_cup.stl │ │ ├── robotiq_85_base_link_coarse.stl │ │ ├── robotiq_85_base_link_fine.stl │ │ ├── smaller_solo_cup.stl │ │ ├── solo_cup.stl │ │ ├── upd_solo_cup.stl │ │ ├── v_base.stl │ │ ├── v_forearm.stl │ │ ├── v_robotiq_85_gripper_joint_3_L.stl │ │ ├── v_robotiq_85_gripper_joint_3_R.stl │ │ ├── v_shoulder.stl │ │ ├── v_upperarm.stl │ │ ├── v_wrist1.stl │ │ ├── v_wrist2.stl │ │ └── v_wrist3.stl │ ├── pendulum.xml │ └── ur5.xml │ ├── options.py │ ├── run_HAC.py │ └── utils.py ├── critic.py ├── design_agent_and_env.py ├── environment.py ├── example_designs ├── PENDULUM_LAY_1_design_agent_and_env.py ├── PENDULUM_LAY_2_design_agent_and_env.py ├── PENDULUM_LAY_3_design_agent_and_env.py ├── UR5_LAY_1_design_agent_and_env.py ├── UR5_LAY_2_design_agent_and_env.py └── UR5_LAY_3_design_agent_and_env.py ├── experience_buffer.py ├── initialize_HAC.py ├── layer.py ├── mujoco_files ├── common │ ├── __init__.py │ ├── materials.xml │ ├── skybox.xml │ └── visual.xml ├── muj_gripper │ ├── c_base.stl │ ├── c_forearm.stl │ ├── c_robotiq_85_gripper_joint_3_L.stl │ ├── c_robotiq_85_gripper_joint_3_R.stl │ ├── c_shoulder.stl │ ├── c_upperarm.stl │ ├── c_wrist1.stl │ ├── c_wrist2.stl │ ├── c_wrist3.stl │ ├── glass_cup.stl │ ├── glass_cup_2.stl │ ├── glass_cup_3.stl │ ├── inner_finger_coarse.stl │ ├── inner_finger_fine.stl │ ├── inner_knuckle_coarse.stl │ ├── inner_knuckle_fine.stl │ ├── new_solo_cup.stl │ ├── outer_finger_coarse.stl │ ├── outer_finger_fine.stl │ ├── outer_knuckle_coarse.stl │ ├── outer_knuckle_fine.stl │ ├── red_solo_cup.stl │ ├── robotiq_85_base_link_coarse.stl │ ├── robotiq_85_base_link_fine.stl │ ├── smaller_solo_cup.stl │ ├── solo_cup.stl │ ├── upd_solo_cup.stl │ ├── v_base.stl │ ├── v_forearm.stl │ ├── v_robotiq_85_gripper_joint_3_L.stl │ ├── v_robotiq_85_gripper_joint_3_R.stl │ ├── v_shoulder.stl │ ├── v_upperarm.stl │ ├── v_wrist1.stl │ ├── v_wrist2.stl │ └── v_wrist3.stl ├── pendulum.xml └── ur5.xml ├── options.py ├── run_HAC.py └── utils.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/.DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 andrew-j-levy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical Actor-Critc (HAC) 2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions. For more information on the algorithm, please see our ICLR 2019 [paper](https://openreview.net/pdf?id=ryzECoAcY7) and [blog post](http://bigai.cs.brown.edu/2019/09/03/hac.html). 3 | 4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. To train agents in the inverted pendulum domain, swap the UR5 reacher *"design_agent_and_env.py"* file for an inverted pendulum *"design_agent_and_env.py"* file, which are located in *"example_designs"* folder folder. To train agents in the ant reacher and ant four rooms environments, execute the command *"python3 initialize_HAC.py --retrain"* in the appropriate folder within the *ant_environments* directory. In the near future, the code for the ant domains will be integrated with the code for the other domains. 5 | 6 | Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI. 7 | 8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu. 9 | 10 | ## UPDATE LOG 11 | 12 | ### 5/20/2020 - Key Changes 13 | 14 | 1. Added 2-level ant environments 15 | 2. Centralized exploration hyperparameters for ant environments in *design_agent_and_env.py* file 16 | 17 | ### 2/25/2020 - Key Changes 18 | 19 | 1. TensorFlow 2.x Compatible 20 | 21 | 2. Fine-tuned exploration parameters of the Ant Reacher environment 22 | 23 | ### 10/1/2019 - Key Changes 24 | 25 | 1. Added Ant Reacher and Ant Four Rooms Environments 26 | 27 | The code for the ant environments has been temporaily added to the *ant_environments* folder. In the near future, the code for the ant domains will be integrated with the code for the other domains. Only minimal changes to the code are needed to run the ant environments. 28 | 29 | ### 10/12/2018 - Key Changes 30 | 1. Bounded Q-Values 31 | 32 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value. 33 | 34 | 2. Removed Target Networks 35 | 36 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file. 37 | 38 | 3. Centralized Design Template 39 | 40 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy. 41 | 42 | 4. Added UR5 Reacher Environment 43 | 44 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly. 45 | -------------------------------------------------------------------------------- /actor.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | import numpy as np 4 | from utils import layer 5 | 6 | 7 | class Actor(): 8 | 9 | def __init__(self, 10 | sess, 11 | env, 12 | batch_size, 13 | layer_number, 14 | FLAGS, 15 | learning_rate=0.001, 16 | tau=0.05): 17 | 18 | self.sess = sess 19 | 20 | # Determine range of actor network outputs. This will be used to configure outer layer of neural network 21 | if layer_number == 0: 22 | self.action_space_bounds = env.action_bounds 23 | self.action_offset = env.action_offset 24 | else: 25 | # Determine symmetric range of subgoal space and offset 26 | self.action_space_bounds = env.subgoal_bounds_symmetric 27 | self.action_offset = env.subgoal_bounds_offset 28 | 29 | # Dimensions of action will depend on layer level 30 | if layer_number == 0: 31 | self.action_space_size = env.action_dim 32 | else: 33 | self.action_space_size = env.subgoal_dim 34 | 35 | self.actor_name = 'actor_' + str(layer_number) 36 | 37 | # Dimensions of goal placeholder will differ depending on layer level 38 | if layer_number == FLAGS.layers - 1: 39 | self.goal_dim = env.end_goal_dim 40 | else: 41 | self.goal_dim = env.subgoal_dim 42 | 43 | self.state_dim = env.state_dim 44 | 45 | self.learning_rate = learning_rate 46 | # self.exploration_policies = exploration_policies 47 | self.tau = tau 48 | self.batch_size = batch_size 49 | 50 | self.state_ph = tf.placeholder(tf.float32, shape=(None, self.state_dim)) 51 | self.goal_ph = tf.placeholder(tf.float32, shape=(None, self.goal_dim)) 52 | self.features_ph = tf.concat([self.state_ph, self.goal_ph], axis=1) 53 | 54 | # Create actor network 55 | self.infer = self.create_nn(self.features_ph) 56 | 57 | # Target network code "repurposed" from Patrick Emani :^) 58 | self.weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name] 59 | # self.num_weights = len(self.weights) 60 | 61 | # Create target actor network 62 | self.target = self.create_nn(self.features_ph, name = self.actor_name + '_target') 63 | self.target_weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name][len(self.weights):] 64 | 65 | self.update_target_weights = \ 66 | [self.target_weights[i].assign(tf.multiply(self.weights[i], self.tau) + 67 | tf.multiply(self.target_weights[i], 1. - self.tau)) 68 | for i in range(len(self.target_weights))] 69 | 70 | self.action_derivs = tf.placeholder(tf.float32, shape=(None, self.action_space_size)) 71 | self.unnormalized_actor_gradients = tf.gradients(self.infer, self.weights, -self.action_derivs) 72 | self.policy_gradient = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients)) 73 | 74 | # self.policy_gradient = tf.gradients(self.infer, self.weights, -self.action_derivs) 75 | self.train = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(self.policy_gradient, self.weights)) 76 | 77 | 78 | def get_action(self, state, goal): 79 | actions = self.sess.run(self.infer, 80 | feed_dict={ 81 | self.state_ph: state, 82 | self.goal_ph: goal 83 | }) 84 | 85 | return actions 86 | 87 | def get_target_action(self, state, goal): 88 | actions = self.sess.run(self.target, 89 | feed_dict={ 90 | self.state_ph: state, 91 | self.goal_ph: goal 92 | }) 93 | 94 | return actions 95 | 96 | def update(self, state, goal, action_derivs): 97 | weights, policy_grad, _ = self.sess.run([self.weights, self.policy_gradient, self.train], 98 | feed_dict={ 99 | self.state_ph: state, 100 | self.goal_ph: goal, 101 | self.action_derivs: action_derivs 102 | }) 103 | 104 | return len(weights) 105 | 106 | # self.sess.run(self.update_target_weights) 107 | 108 | # def create_nn(self, state, goal, name='actor'): 109 | def create_nn(self, features, name=None): 110 | 111 | if name is None: 112 | name = self.actor_name 113 | 114 | with tf.variable_scope(name + '_fc_1'): 115 | fc1 = layer(features, 64) 116 | with tf.variable_scope(name + '_fc_2'): 117 | fc2 = layer(fc1, 64) 118 | with tf.variable_scope(name + '_fc_3'): 119 | fc3 = layer(fc2, 64) 120 | with tf.variable_scope(name + '_fc_4'): 121 | fc4 = layer(fc3, self.action_space_size, is_output=True) 122 | 123 | output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset 124 | 125 | return output 126 | 127 | 128 | -------------------------------------------------------------------------------- /ant_environments/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/.DS_Store -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical Actor-Critc (HAC) 2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions. 3 | 4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI. 5 | 6 | To run HAC with your own agents and MuJoCo environments, you need to complete the template in the *"design_agent_and_env.py"* file. The *"example_designs"* folder contains other examples of design templates that build different agents in the UR5 reacher and inverted pendulum environments. 7 | 8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu. 9 | 10 | ## UPDATE LOG 11 | 12 | ### 10/12/2018 - Key Changes 13 | 1. Bounded Q-Values 14 | 15 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value. 16 | 17 | 2. Removed Target Networks 18 | 19 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file. 20 | 21 | 3. Centralized Design Template 22 | 23 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy. 24 | 25 | 4. Added UR5 Reacher Environment 26 | 27 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly. 28 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/actor.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | import numpy as np 4 | from utils import layer 5 | 6 | 7 | class Actor(): 8 | 9 | def __init__(self, 10 | sess, 11 | env, 12 | batch_size, 13 | layer_number, 14 | FLAGS, 15 | learning_rate=0.001, 16 | tau=0.05): 17 | 18 | self.sess = sess 19 | 20 | # Determine range of actor network outputs. This will be used to configure outer layer of neural network 21 | if layer_number == 0: 22 | self.action_space_bounds = env.action_bounds 23 | self.action_offset = env.action_offset 24 | else: 25 | # Determine symmetric range of subgoal space and offset 26 | self.action_space_bounds = env.subgoal_bounds_symmetric 27 | self.action_offset = env.subgoal_bounds_offset 28 | 29 | # Dimensions of action will depend on layer level 30 | if layer_number == 0: 31 | self.action_space_size = env.action_dim 32 | else: 33 | self.action_space_size = env.subgoal_dim 34 | 35 | self.actor_name = 'actor_' + str(layer_number) 36 | 37 | # Dimensions of goal placeholder will differ depending on layer level 38 | if layer_number == FLAGS.layers - 1: 39 | self.goal_dim = env.end_goal_dim 40 | else: 41 | self.goal_dim = env.subgoal_dim 42 | 43 | self.state_dim = env.state_dim 44 | 45 | self.learning_rate = learning_rate 46 | # self.exploration_policies = exploration_policies 47 | self.tau = tau 48 | # self.batch_size = batch_size 49 | self.batch_size = tf.placeholder(tf.float32) 50 | 51 | self.state_ph = tf.placeholder(tf.float32, shape=(None, self.state_dim)) 52 | self.goal_ph = tf.placeholder(tf.float32, shape=(None, self.goal_dim)) 53 | self.features_ph = tf.concat([self.state_ph, self.goal_ph], axis=1) 54 | 55 | # Create actor network 56 | self.infer = self.create_nn(self.features_ph) 57 | 58 | # Target network code "repurposed" from Patrick Emani :^) 59 | self.weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name] 60 | # self.num_weights = len(self.weights) 61 | 62 | # Create target actor network 63 | self.target = self.create_nn(self.features_ph, name = self.actor_name + '_target') 64 | self.target_weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name][len(self.weights):] 65 | 66 | self.update_target_weights = \ 67 | [self.target_weights[i].assign(tf.multiply(self.weights[i], self.tau) + 68 | tf.multiply(self.target_weights[i], 1. - self.tau)) 69 | for i in range(len(self.target_weights))] 70 | 71 | self.action_derivs = tf.placeholder(tf.float32, shape=(None, self.action_space_size)) 72 | self.unnormalized_actor_gradients = tf.gradients(self.infer, self.weights, -self.action_derivs) 73 | self.policy_gradient = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients)) 74 | 75 | # self.policy_gradient = tf.gradients(self.infer, self.weights, -self.action_derivs) 76 | self.train = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(self.policy_gradient, self.weights)) 77 | 78 | 79 | def get_action(self, state, goal): 80 | actions = self.sess.run(self.infer, 81 | feed_dict={ 82 | self.state_ph: state, 83 | self.goal_ph: goal 84 | }) 85 | 86 | return actions 87 | 88 | def get_target_action(self, state, goal): 89 | actions = self.sess.run(self.target, 90 | feed_dict={ 91 | self.state_ph: state, 92 | self.goal_ph: goal 93 | }) 94 | 95 | return actions 96 | 97 | def update(self, state, goal, action_derivs, next_batch_size): 98 | weights, policy_grad, _ = self.sess.run([self.weights, self.policy_gradient, self.train], 99 | feed_dict={ 100 | self.state_ph: state, 101 | self.goal_ph: goal, 102 | self.action_derivs: action_derivs, 103 | self.batch_size: next_batch_size 104 | }) 105 | 106 | return len(weights) 107 | 108 | # self.sess.run(self.update_target_weights) 109 | 110 | # def create_nn(self, state, goal, name='actor'): 111 | def create_nn(self, features, name=None): 112 | 113 | if name is None: 114 | name = self.actor_name 115 | 116 | with tf.variable_scope(name + '_fc_1'): 117 | fc1 = layer(features, 64) 118 | with tf.variable_scope(name + '_fc_2'): 119 | fc2 = layer(fc1, 64) 120 | with tf.variable_scope(name + '_fc_3'): 121 | fc3 = layer(fc2, 64) 122 | with tf.variable_scope(name + '_fc_4'): 123 | fc4 = layer(fc3, self.action_space_size, is_output=True) 124 | 125 | output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset 126 | 127 | return output 128 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/experience_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class ExperienceBuffer(): 4 | 5 | def __init__(self, max_buffer_size, batch_size): 6 | self.size = 0 7 | self.max_buffer_size = max_buffer_size 8 | self.experiences = [] 9 | self.batch_size = batch_size 10 | 11 | def add(self, experience): 12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')' 13 | assert type(experience[5]) == bool 14 | 15 | self.experiences.append(experience) 16 | self.size += 1 17 | 18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance 19 | if self.size >= self.max_buffer_size: 20 | beg_index = int(np.floor(self.max_buffer_size/6)) 21 | self.experiences = self.experiences[beg_index:] 22 | self.size -= beg_index 23 | 24 | def get_batch(self): 25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], [] 26 | dist = np.random.randint(0, high=self.size, size=min(self.size, self.batch_size)) 27 | 28 | for i in dist: 29 | states.append(self.experiences[i][0]) 30 | actions.append(self.experiences[i][1]) 31 | rewards.append(self.experiences[i][2]) 32 | new_states.append(self.experiences[i][3]) 33 | goals.append(self.experiences[i][4]) 34 | is_terminals.append(self.experiences[i][5]) 35 | 36 | return states, actions, rewards, new_states, goals, is_terminals 37 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/initialize_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified 3 | by the user and instantiates the environment and agent. 4 | """ 5 | 6 | from design_agent_and_env import design_agent_and_env 7 | from options import parse_options 8 | from agent import Agent 9 | from run_HAC import run_HAC 10 | 11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file. 12 | FLAGS = parse_options() 13 | 14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file. 15 | agent, env = design_agent_and_env(FLAGS) 16 | 17 | # Begin training 18 | run_HAC(FLAGS,env,agent) 19 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.data-00000-of-00001 -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.index -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/models/HAC.ckpt-99.meta -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/models/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "/home/tester/Documents/HAC/extra_copy/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_four_rooms_2_levels_2/models/HAC.ckpt-99" 2 | all_model_checkpoint_paths: "/home/tester/Documents/HAC/extra_copy/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_four_rooms_2_levels_2/models/HAC.ckpt-99" 3 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The dm_control Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """Functions to manage the common assets for domains.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | from dm_control.utils import resources 24 | 25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__)) 26 | _FILENAMES = [ 27 | "common/materials.xml", 28 | "common/skybox.xml", 29 | "common/visual.xml", 30 | ] 31 | 32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename)) 33 | for filename in _FILENAMES} 34 | 35 | 36 | def read_model(model_filename): 37 | """Reads a model XML file and returns its contents as a string.""" 38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename)) 39 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/common/materials.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/common/skybox.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/common/visual.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_base.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_forearm.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_shoulder.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_upperarm.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist1.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist2.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/c_wrist3.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup_2.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/glass_cup_3.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_finger_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/new_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/new_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_finger_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/red_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/red_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/upd_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/upd_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_base.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_forearm.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_shoulder.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_upperarm.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist1.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist2.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/mujoco_files/muj_gripper/v_wrist3.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/mujoco_files/pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | """ 4 | Below are training options user can specify in command line. 5 | 6 | Options Include: 7 | 8 | 1. Retrain boolean ("--retrain") 9 | - If included, actor and critic neural network parameters are reset 10 | 11 | 2. Testing boolean ("--test") 12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks. 13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress. 14 | 15 | 3. Show boolean ("--show") 16 | - If included, training will be visualized 17 | 18 | 4. Train Only boolean ("--train_only") 19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing 20 | 21 | 5. Verbosity boolean ("--verbose") 22 | - If included, summary of each transition will be printed 23 | 24 | 6. All Trans boolean ("--all_trans") 25 | - If included, all transitions including (i) hindsight action, (ii) subgoal penalty, (iii) preliminary HER, and (iv) final HER transitions will be printed. Use below options to print out specific types of transitions. 26 | 27 | 7. Hindsight Action trans boolean ("hind_action") 28 | - If included, prints hindsight actions transitions for each level 29 | 30 | 8. Subgoal Penalty trans ("penalty") 31 | - If included, prints the subgoal penalty transitions 32 | 33 | 9. Preliminary HER trans ("prelim_HER") 34 | -If included, prints the preliminary HER transitions (i.e., with TBD reward and goal components) 35 | 36 | 10. HER trans ("HER") 37 | - If included, prints the final HER transitions for each level 38 | 39 | 11. Show Q-values ("--Q_values") 40 | - Show Q-values for each action by each level 41 | 42 | """ 43 | 44 | def parse_options(): 45 | parser = argparse.ArgumentParser() 46 | 47 | parser.add_argument( 48 | '--retrain', 49 | action='store_true', 50 | help='Include to reset policy' 51 | ) 52 | 53 | parser.add_argument( 54 | '--test', 55 | action='store_true', 56 | help='Include to fix current policy' 57 | ) 58 | 59 | parser.add_argument( 60 | '--show', 61 | action='store_true', 62 | help='Include to visualize training' 63 | ) 64 | 65 | parser.add_argument( 66 | '--train_only', 67 | action='store_true', 68 | help='Include to use training mode only' 69 | ) 70 | 71 | parser.add_argument( 72 | '--verbose', 73 | action='store_true', 74 | help='Print summary of each transition' 75 | ) 76 | 77 | parser.add_argument( 78 | '--all_trans', 79 | action='store_true', 80 | help='Print summary of each transition' 81 | ) 82 | 83 | parser.add_argument( 84 | '--hind_action', 85 | action='store_true', 86 | help='Print summary of each transition' 87 | ) 88 | 89 | parser.add_argument( 90 | '--penalty', 91 | action='store_true', 92 | help='Print summary of each transition' 93 | ) 94 | 95 | parser.add_argument( 96 | '--prelim_HER', 97 | action='store_true', 98 | help='Print summary of each transition' 99 | ) 100 | 101 | parser.add_argument( 102 | '--HER', 103 | action='store_true', 104 | help='Print summary of each transition' 105 | ) 106 | 107 | parser.add_argument( 108 | '--Q_values', 109 | action='store_true', 110 | help='Print summary of each transition' 111 | ) 112 | 113 | FLAGS, unparsed = parser.parse_known_args() 114 | 115 | 116 | return FLAGS 117 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/performance_log.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_2_levels/performance_log.p -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/run_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file. 3 | """ 4 | 5 | import pickle as cpickle 6 | import agent as Agent 7 | from utils import print_summary 8 | 9 | NUM_BATCH = 1000 10 | TEST_FREQ = 2 11 | 12 | num_test_episodes = 100 13 | 14 | def run_HAC(FLAGS,env,agent): 15 | 16 | # Print task summary 17 | print_summary(FLAGS,env) 18 | 19 | total_episodes = 0 20 | 21 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress 22 | mix_train_test = False 23 | if not FLAGS.test and not FLAGS.train_only: 24 | mix_train_test = True 25 | 26 | for batch in range(NUM_BATCH): 27 | 28 | num_episodes = agent.other_params["num_exploration_episodes"] 29 | 30 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing 31 | if mix_train_test and batch % TEST_FREQ == 0: 32 | print("\n--- TESTING ---") 33 | agent.FLAGS.test = True 34 | num_episodes = num_test_episodes 35 | 36 | # Reset successful episode counter 37 | successful_episodes = 0 38 | 39 | for episode in range(num_episodes): 40 | 41 | print("\nBatch %d, Episode %d" % (batch, episode)) 42 | 43 | # Train for an episode 44 | success = agent.train(env, episode, total_episodes) 45 | 46 | if success: 47 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode)) 48 | 49 | # Increment successful episode counter if applicable 50 | if mix_train_test and batch % TEST_FREQ == 0: 51 | successful_episodes += 1 52 | 53 | if FLAGS.train_only or (mix_train_test and batch % TEST_FREQ != 0): 54 | total_episodes += 1 55 | 56 | # Save agent 57 | agent.save_model(episode) 58 | 59 | # Finish evaluating policy if tested prior batch 60 | if mix_train_test and batch % TEST_FREQ == 0: 61 | 62 | # Log performance 63 | success_rate = successful_episodes / num_test_episodes * 100 64 | print("\nTesting Success Rate %.2f%%" % success_rate) 65 | agent.log_performance(success_rate) 66 | agent.FLAGS.test = False 67 | 68 | print("\n--- END TESTING ---\n") 69 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_2_levels/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | 4 | def layer(input_layer, num_next_neurons, is_output=False): 5 | num_prev_neurons = int(input_layer.shape[1]) 6 | shape = [num_prev_neurons, num_next_neurons] 7 | 8 | if is_output: 9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 11 | else: 12 | # 1/sqrt(f) 13 | fan_in_init = 1 / num_prev_neurons ** 0.5 14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 16 | 17 | weights = tf.get_variable("weights", shape, initializer=weight_init) 18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 19 | 20 | dot = tf.matmul(input_layer, weights) + biases 21 | 22 | if is_output: 23 | return dot 24 | 25 | relu = tf.nn.relu(dot) 26 | return relu 27 | 28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False): 29 | num_prev_neurons = int(input_layer.shape[1]) 30 | shape = [num_prev_neurons, num_next_neurons] 31 | 32 | 33 | fan_in_init = 1 / num_prev_neurons ** 0.5 34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 36 | 37 | weights = tf.get_variable("weights", shape, initializer=weight_init) 38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 39 | 40 | dot = tf.matmul(input_layer, weights) + biases 41 | 42 | if is_output: 43 | return dot 44 | 45 | relu = tf.nn.relu(dot) 46 | return relu 47 | 48 | 49 | # Below function prints out options and environment specified by user 50 | def print_summary(FLAGS,env): 51 | 52 | print("\n- - - - - - - - - - -") 53 | print("Task Summary: ","\n") 54 | print("Environment: ", env.name) 55 | print("Number of Layers: ", FLAGS.layers) 56 | print("Time Limit per Layer: ", FLAGS.time_scale) 57 | print("Max Episode Time Steps: ", env.max_actions) 58 | print("Retrain: ", FLAGS.retrain) 59 | print("Test: ", FLAGS.test) 60 | print("Visualize: ", FLAGS.show) 61 | print("- - - - - - - - - - -", "\n\n") 62 | 63 | 64 | # Below function ensures environment configurations were properly entered 65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action): 66 | 67 | # Ensure model file is an ".xml" file 68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file" 69 | 70 | # Ensure upper bounds of range is >= lower bound of range 71 | if goal_space_train is not None: 72 | for i in range(len(goal_space_train)): 73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound" 74 | 75 | if goal_space_test is not None: 76 | for i in range(len(goal_space_test)): 77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound" 78 | 79 | for i in range(len(initial_state_space)): 80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound" 81 | 82 | for i in range(len(subgoal_bounds)): 83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound" 84 | 85 | # Make sure end goal spaces and thresholds have same first dimension 86 | if goal_space_train is not None and goal_space_test is not None: 87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension" 88 | 89 | # Makde sure suboal spaces and thresholds have same dimensions 90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension" 91 | 92 | # Ensure max action and timesteps_per_action are postive integers 93 | assert max_actions > 0, "Max actions should be a positive integer" 94 | 95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer" 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical Actor-Critc (HAC) 2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions. 3 | 4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI. 5 | 6 | To run HAC with your own agents and MuJoCo environments, you need to complete the template in the *"design_agent_and_env.py"* file. The *"example_designs"* folder contains other examples of design templates that build different agents in the UR5 reacher and inverted pendulum environments. 7 | 8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu. 9 | 10 | ## UPDATE LOG 11 | 12 | ### 10/12/2018 - Key Changes 13 | 1. Bounded Q-Values 14 | 15 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value. 16 | 17 | 2. Removed Target Networks 18 | 19 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file. 20 | 21 | 3. Centralized Design Template 22 | 23 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy. 24 | 25 | 4. Added UR5 Reacher Environment 26 | 27 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly. 28 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/actor.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | import numpy as np 4 | from utils import layer 5 | 6 | 7 | class Actor(): 8 | 9 | def __init__(self, 10 | sess, 11 | env, 12 | batch_size, 13 | layer_number, 14 | FLAGS, 15 | learning_rate=0.001, 16 | tau=0.05): 17 | 18 | self.sess = sess 19 | 20 | # Determine range of actor network outputs. This will be used to configure outer layer of neural network 21 | if layer_number == 0: 22 | self.action_space_bounds = env.action_bounds 23 | self.action_offset = env.action_offset 24 | else: 25 | # Determine symmetric range of subgoal space and offset 26 | self.action_space_bounds = env.subgoal_bounds_symmetric 27 | self.action_offset = env.subgoal_bounds_offset 28 | 29 | # Dimensions of action will depend on layer level 30 | if layer_number == 0: 31 | self.action_space_size = env.action_dim 32 | else: 33 | self.action_space_size = env.subgoal_dim 34 | 35 | self.actor_name = 'actor_' + str(layer_number) 36 | 37 | # Dimensions of goal placeholder will differ depending on layer level 38 | if layer_number == FLAGS.layers - 1: 39 | self.goal_dim = env.end_goal_dim 40 | else: 41 | self.goal_dim = env.subgoal_dim 42 | 43 | self.state_dim = env.state_dim 44 | 45 | self.learning_rate = learning_rate 46 | # self.exploration_policies = exploration_policies 47 | self.tau = tau 48 | # self.batch_size = batch_size 49 | self.batch_size = tf.placeholder(tf.float32) 50 | 51 | self.state_ph = tf.placeholder(tf.float32, shape=(None, self.state_dim)) 52 | self.goal_ph = tf.placeholder(tf.float32, shape=(None, self.goal_dim)) 53 | self.features_ph = tf.concat([self.state_ph, self.goal_ph], axis=1) 54 | 55 | # Create actor network 56 | self.infer = self.create_nn(self.features_ph) 57 | 58 | # Target network code "repurposed" from Patrick Emani :^) 59 | self.weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name] 60 | # self.num_weights = len(self.weights) 61 | 62 | # Create target actor network 63 | self.target = self.create_nn(self.features_ph, name = self.actor_name + '_target') 64 | self.target_weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name][len(self.weights):] 65 | 66 | self.update_target_weights = \ 67 | [self.target_weights[i].assign(tf.multiply(self.weights[i], self.tau) + 68 | tf.multiply(self.target_weights[i], 1. - self.tau)) 69 | for i in range(len(self.target_weights))] 70 | 71 | self.action_derivs = tf.placeholder(tf.float32, shape=(None, self.action_space_size)) 72 | self.unnormalized_actor_gradients = tf.gradients(self.infer, self.weights, -self.action_derivs) 73 | self.policy_gradient = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients)) 74 | 75 | # self.policy_gradient = tf.gradients(self.infer, self.weights, -self.action_derivs) 76 | self.train = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(self.policy_gradient, self.weights)) 77 | 78 | 79 | def get_action(self, state, goal): 80 | actions = self.sess.run(self.infer, 81 | feed_dict={ 82 | self.state_ph: state, 83 | self.goal_ph: goal 84 | }) 85 | 86 | return actions 87 | 88 | def get_target_action(self, state, goal): 89 | actions = self.sess.run(self.target, 90 | feed_dict={ 91 | self.state_ph: state, 92 | self.goal_ph: goal 93 | }) 94 | 95 | return actions 96 | 97 | def update(self, state, goal, action_derivs, next_batch_size): 98 | weights, policy_grad, _ = self.sess.run([self.weights, self.policy_gradient, self.train], 99 | feed_dict={ 100 | self.state_ph: state, 101 | self.goal_ph: goal, 102 | self.action_derivs: action_derivs, 103 | self.batch_size: next_batch_size 104 | }) 105 | 106 | return len(weights) 107 | 108 | # self.sess.run(self.update_target_weights) 109 | 110 | # def create_nn(self, state, goal, name='actor'): 111 | def create_nn(self, features, name=None): 112 | 113 | if name is None: 114 | name = self.actor_name 115 | 116 | with tf.variable_scope(name + '_fc_1'): 117 | fc1 = layer(features, 64) 118 | with tf.variable_scope(name + '_fc_2'): 119 | fc2 = layer(fc1, 64) 120 | with tf.variable_scope(name + '_fc_3'): 121 | fc3 = layer(fc2, 64) 122 | with tf.variable_scope(name + '_fc_4'): 123 | fc4 = layer(fc3, self.action_space_size, is_output=True) 124 | 125 | output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset 126 | 127 | return output 128 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/experience_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class ExperienceBuffer(): 4 | 5 | def __init__(self, max_buffer_size, batch_size): 6 | self.size = 0 7 | self.max_buffer_size = max_buffer_size 8 | self.experiences = [] 9 | self.batch_size = batch_size 10 | 11 | def add(self, experience): 12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')' 13 | assert type(experience[5]) == bool 14 | 15 | self.experiences.append(experience) 16 | self.size += 1 17 | 18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance 19 | if self.size >= self.max_buffer_size: 20 | beg_index = int(np.floor(self.max_buffer_size/6)) 21 | self.experiences = self.experiences[beg_index:] 22 | self.size -= beg_index 23 | 24 | def get_batch(self): 25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], [] 26 | dist = np.random.randint(0, high=self.size, size=min(self.size, self.batch_size)) 27 | 28 | for i in dist: 29 | states.append(self.experiences[i][0]) 30 | actions.append(self.experiences[i][1]) 31 | rewards.append(self.experiences[i][2]) 32 | new_states.append(self.experiences[i][3]) 33 | goals.append(self.experiences[i][4]) 34 | is_terminals.append(self.experiences[i][5]) 35 | 36 | return states, actions, rewards, new_states, goals, is_terminals 37 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/initialize_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified 3 | by the user and instantiates the environment and agent. 4 | """ 5 | 6 | from design_agent_and_env import design_agent_and_env 7 | from options import parse_options 8 | from agent import Agent 9 | from run_HAC import run_HAC 10 | 11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file. 12 | FLAGS = parse_options() 13 | 14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file. 15 | agent, env = design_agent_and_env(FLAGS) 16 | 17 | # Begin training 18 | run_HAC(FLAGS,env,agent) 19 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The dm_control Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """Functions to manage the common assets for domains.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | from dm_control.utils import resources 24 | 25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__)) 26 | _FILENAMES = [ 27 | "common/materials.xml", 28 | "common/skybox.xml", 29 | "common/visual.xml", 30 | ] 31 | 32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename)) 33 | for filename in _FILENAMES} 34 | 35 | 36 | def read_model(model_filename): 37 | """Reads a model XML file and returns its contents as a string.""" 38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename)) 39 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/common/materials.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/common/skybox.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/common/visual.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_base.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_forearm.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_shoulder.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_upperarm.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist1.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist2.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/c_wrist3.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup_2.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/glass_cup_3.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_finger_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/new_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/new_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_finger_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/red_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/red_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/upd_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/upd_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_base.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_forearm.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_shoulder.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_upperarm.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist1.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist2.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_four_rooms_3_levels/mujoco_files/muj_gripper/v_wrist3.stl -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/mujoco_files/pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | """ 4 | Below are training options user can specify in command line. 5 | 6 | Options Include: 7 | 8 | 1. Retrain boolean ("--retrain") 9 | - If included, actor and critic neural network parameters are reset 10 | 11 | 2. Testing boolean ("--test") 12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks. 13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress. 14 | 15 | 3. Show boolean ("--show") 16 | - If included, training will be visualized 17 | 18 | 4. Train Only boolean ("--train_only") 19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing 20 | 21 | 5. Verbosity boolean ("--verbose") 22 | - If included, summary of each transition will be printed 23 | 24 | 6. All Trans boolean ("--all_trans") 25 | - If included, all transitions including (i) hindsight action, (ii) subgoal penalty, (iii) preliminary HER, and (iv) final HER transitions will be printed. Use below options to print out specific types of transitions. 26 | 27 | 7. Hindsight Action trans boolean ("hind_action") 28 | - If included, prints hindsight actions transitions for each level 29 | 30 | 8. Subgoal Penalty trans ("penalty") 31 | - If included, prints the subgoal penalty transitions 32 | 33 | 9. Preliminary HER trans ("prelim_HER") 34 | -If included, prints the preliminary HER transitions (i.e., with TBD reward and goal components) 35 | 36 | 10. HER trans ("HER") 37 | - If included, prints the final HER transitions for each level 38 | 39 | 11. Show Q-values ("--Q_values") 40 | - Show Q-values for each action by each level 41 | 42 | """ 43 | 44 | def parse_options(): 45 | parser = argparse.ArgumentParser() 46 | 47 | parser.add_argument( 48 | '--retrain', 49 | action='store_true', 50 | help='Include to reset policy' 51 | ) 52 | 53 | parser.add_argument( 54 | '--test', 55 | action='store_true', 56 | help='Include to fix current policy' 57 | ) 58 | 59 | parser.add_argument( 60 | '--show', 61 | action='store_true', 62 | help='Include to visualize training' 63 | ) 64 | 65 | parser.add_argument( 66 | '--train_only', 67 | action='store_true', 68 | help='Include to use training mode only' 69 | ) 70 | 71 | parser.add_argument( 72 | '--verbose', 73 | action='store_true', 74 | help='Print summary of each transition' 75 | ) 76 | 77 | parser.add_argument( 78 | '--all_trans', 79 | action='store_true', 80 | help='Print summary of each transition' 81 | ) 82 | 83 | parser.add_argument( 84 | '--hind_action', 85 | action='store_true', 86 | help='Print summary of each transition' 87 | ) 88 | 89 | parser.add_argument( 90 | '--penalty', 91 | action='store_true', 92 | help='Print summary of each transition' 93 | ) 94 | 95 | parser.add_argument( 96 | '--prelim_HER', 97 | action='store_true', 98 | help='Print summary of each transition' 99 | ) 100 | 101 | parser.add_argument( 102 | '--HER', 103 | action='store_true', 104 | help='Print summary of each transition' 105 | ) 106 | 107 | parser.add_argument( 108 | '--Q_values', 109 | action='store_true', 110 | help='Print summary of each transition' 111 | ) 112 | 113 | FLAGS, unparsed = parser.parse_known_args() 114 | 115 | 116 | return FLAGS 117 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/run_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file. 3 | """ 4 | 5 | import pickle as cpickle 6 | import agent as Agent 7 | from utils import print_summary 8 | 9 | NUM_BATCH = 1000 10 | TEST_FREQ = 2 11 | 12 | num_test_episodes = 100 13 | 14 | def run_HAC(FLAGS,env,agent): 15 | 16 | # Print task summary 17 | print_summary(FLAGS,env) 18 | 19 | total_episodes = 0 20 | 21 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress 22 | mix_train_test = False 23 | if not FLAGS.test and not FLAGS.train_only: 24 | mix_train_test = True 25 | 26 | for batch in range(NUM_BATCH): 27 | 28 | num_episodes = agent.other_params["num_exploration_episodes"] 29 | 30 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing 31 | if mix_train_test and batch % TEST_FREQ == 0: 32 | print("\n--- TESTING ---") 33 | agent.FLAGS.test = True 34 | num_episodes = num_test_episodes 35 | 36 | # Reset successful episode counter 37 | successful_episodes = 0 38 | 39 | for episode in range(num_episodes): 40 | 41 | print("\nBatch %d, Episode %d" % (batch, episode)) 42 | 43 | # Train for an episode 44 | success = agent.train(env, episode, total_episodes) 45 | 46 | if success: 47 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode)) 48 | 49 | # Increment successful episode counter if applicable 50 | if mix_train_test and batch % TEST_FREQ == 0: 51 | successful_episodes += 1 52 | 53 | if FLAGS.train_only or (mix_train_test and batch % TEST_FREQ != 0): 54 | total_episodes += 1 55 | 56 | # Save agent 57 | agent.save_model(episode) 58 | 59 | # Finish evaluating policy if tested prior batch 60 | if mix_train_test and batch % TEST_FREQ == 0: 61 | 62 | # Log performance 63 | success_rate = successful_episodes / num_test_episodes * 100 64 | print("\nTesting Success Rate %.2f%%" % success_rate) 65 | agent.log_performance(success_rate) 66 | agent.FLAGS.test = False 67 | 68 | print("\n--- END TESTING ---\n") 69 | -------------------------------------------------------------------------------- /ant_environments/ant_four_rooms_3_levels/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | 4 | def layer(input_layer, num_next_neurons, is_output=False): 5 | num_prev_neurons = int(input_layer.shape[1]) 6 | shape = [num_prev_neurons, num_next_neurons] 7 | 8 | if is_output: 9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 11 | else: 12 | # 1/sqrt(f) 13 | fan_in_init = 1 / num_prev_neurons ** 0.5 14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 16 | 17 | weights = tf.get_variable("weights", shape, initializer=weight_init) 18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 19 | 20 | dot = tf.matmul(input_layer, weights) + biases 21 | 22 | if is_output: 23 | return dot 24 | 25 | relu = tf.nn.relu(dot) 26 | return relu 27 | 28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False): 29 | num_prev_neurons = int(input_layer.shape[1]) 30 | shape = [num_prev_neurons, num_next_neurons] 31 | 32 | 33 | fan_in_init = 1 / num_prev_neurons ** 0.5 34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 36 | 37 | weights = tf.get_variable("weights", shape, initializer=weight_init) 38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 39 | 40 | dot = tf.matmul(input_layer, weights) + biases 41 | 42 | if is_output: 43 | return dot 44 | 45 | relu = tf.nn.relu(dot) 46 | return relu 47 | 48 | 49 | # Below function prints out options and environment specified by user 50 | def print_summary(FLAGS,env): 51 | 52 | print("\n- - - - - - - - - - -") 53 | print("Task Summary: ","\n") 54 | print("Environment: ", env.name) 55 | print("Number of Layers: ", FLAGS.layers) 56 | print("Time Limit per Layer: ", FLAGS.time_scale) 57 | print("Max Episode Time Steps: ", env.max_actions) 58 | print("Retrain: ", FLAGS.retrain) 59 | print("Test: ", FLAGS.test) 60 | print("Visualize: ", FLAGS.show) 61 | print("- - - - - - - - - - -", "\n\n") 62 | 63 | 64 | # Below function ensures environment configurations were properly entered 65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action): 66 | 67 | # Ensure model file is an ".xml" file 68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file" 69 | 70 | # Ensure upper bounds of range is >= lower bound of range 71 | if goal_space_train is not None: 72 | for i in range(len(goal_space_train)): 73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound" 74 | 75 | if goal_space_test is not None: 76 | for i in range(len(goal_space_test)): 77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound" 78 | 79 | for i in range(len(initial_state_space)): 80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound" 81 | 82 | for i in range(len(subgoal_bounds)): 83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound" 84 | 85 | # Make sure end goal spaces and thresholds have same first dimension 86 | if goal_space_train is not None and goal_space_test is not None: 87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension" 88 | 89 | # Makde sure suboal spaces and thresholds have same dimensions 90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension" 91 | 92 | # Ensure max action and timesteps_per_action are postive integers 93 | assert max_actions > 0, "Max actions should be a positive integer" 94 | 95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer" 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/.DS_Store -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical Actor-Critc (HAC) 2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions. 3 | 4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI. 5 | 6 | To run HAC with your own agents and MuJoCo environments, you need to complete the template in the *"design_agent_and_env.py"* file. The *"example_designs"* folder contains other examples of design templates that build different agents in the UR5 reacher and inverted pendulum environments. 7 | 8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu. 9 | 10 | ## UPDATE LOG 11 | 12 | ### 10/12/2018 - Key Changes 13 | 1. Bounded Q-Values 14 | 15 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value. 16 | 17 | 2. Removed Target Networks 18 | 19 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file. 20 | 21 | 3. Centralized Design Template 22 | 23 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy. 24 | 25 | 4. Added UR5 Reacher Environment 26 | 27 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly. 28 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/actor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/actor.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/agent.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/critic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/critic.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/design_agent_and_env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/design_agent_and_env.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/environment.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/environment.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/experience_buffer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/experience_buffer.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/layer.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/options.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/options.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/run_HAC.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/run_HAC.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/actor.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | import numpy as np 4 | from utils import layer 5 | 6 | 7 | class Actor(): 8 | 9 | def __init__(self, 10 | sess, 11 | env, 12 | batch_size, 13 | layer_number, 14 | FLAGS, 15 | learning_rate=0.001, 16 | tau=0.05): 17 | 18 | self.sess = sess 19 | 20 | # Determine range of actor network outputs. This will be used to configure outer layer of neural network 21 | if layer_number == 0: 22 | self.action_space_bounds = env.action_bounds 23 | self.action_offset = env.action_offset 24 | else: 25 | # Determine symmetric range of subgoal space and offset 26 | self.action_space_bounds = env.subgoal_bounds_symmetric 27 | self.action_offset = env.subgoal_bounds_offset 28 | 29 | # Dimensions of action will depend on layer level 30 | if layer_number == 0: 31 | self.action_space_size = env.action_dim 32 | else: 33 | self.action_space_size = env.subgoal_dim 34 | 35 | self.actor_name = 'actor_' + str(layer_number) 36 | 37 | # Dimensions of goal placeholder will differ depending on layer level 38 | if layer_number == FLAGS.layers - 1: 39 | self.goal_dim = env.end_goal_dim 40 | else: 41 | self.goal_dim = env.subgoal_dim 42 | 43 | self.state_dim = env.state_dim 44 | 45 | self.learning_rate = learning_rate 46 | # self.exploration_policies = exploration_policies 47 | self.tau = tau 48 | # self.batch_size = batch_size 49 | self.batch_size = tf.placeholder(tf.float32) 50 | 51 | self.state_ph = tf.placeholder(tf.float32, shape=(None, self.state_dim)) 52 | self.goal_ph = tf.placeholder(tf.float32, shape=(None, self.goal_dim)) 53 | self.features_ph = tf.concat([self.state_ph, self.goal_ph], axis=1) 54 | 55 | # Create actor network 56 | self.infer = self.create_nn(self.features_ph) 57 | 58 | # Target network code "repurposed" from Patrick Emani :^) 59 | self.weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name] 60 | # self.num_weights = len(self.weights) 61 | 62 | # Create target actor network 63 | self.target = self.create_nn(self.features_ph, name = self.actor_name + '_target') 64 | self.target_weights = [v for v in tf.trainable_variables() if self.actor_name in v.op.name][len(self.weights):] 65 | 66 | self.update_target_weights = \ 67 | [self.target_weights[i].assign(tf.multiply(self.weights[i], self.tau) + 68 | tf.multiply(self.target_weights[i], 1. - self.tau)) 69 | for i in range(len(self.target_weights))] 70 | 71 | self.action_derivs = tf.placeholder(tf.float32, shape=(None, self.action_space_size)) 72 | self.unnormalized_actor_gradients = tf.gradients(self.infer, self.weights, -self.action_derivs) 73 | self.policy_gradient = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients)) 74 | 75 | # self.policy_gradient = tf.gradients(self.infer, self.weights, -self.action_derivs) 76 | self.train = tf.train.AdamOptimizer(learning_rate).apply_gradients(zip(self.policy_gradient, self.weights)) 77 | 78 | 79 | def get_action(self, state, goal): 80 | actions = self.sess.run(self.infer, 81 | feed_dict={ 82 | self.state_ph: state, 83 | self.goal_ph: goal 84 | }) 85 | 86 | return actions 87 | 88 | def get_target_action(self, state, goal): 89 | actions = self.sess.run(self.target, 90 | feed_dict={ 91 | self.state_ph: state, 92 | self.goal_ph: goal 93 | }) 94 | 95 | return actions 96 | 97 | def update(self, state, goal, action_derivs, next_batch_size): 98 | weights, policy_grad, _ = self.sess.run([self.weights, self.policy_gradient, self.train], 99 | feed_dict={ 100 | self.state_ph: state, 101 | self.goal_ph: goal, 102 | self.action_derivs: action_derivs, 103 | self.batch_size: next_batch_size 104 | }) 105 | 106 | return len(weights) 107 | 108 | # self.sess.run(self.update_target_weights) 109 | 110 | # def create_nn(self, state, goal, name='actor'): 111 | def create_nn(self, features, name=None): 112 | 113 | if name is None: 114 | name = self.actor_name 115 | 116 | with tf.variable_scope(name + '_fc_1'): 117 | fc1 = layer(features, 64) 118 | with tf.variable_scope(name + '_fc_2'): 119 | fc2 = layer(fc1, 64) 120 | with tf.variable_scope(name + '_fc_3'): 121 | fc3 = layer(fc2, 64) 122 | with tf.variable_scope(name + '_fc_4'): 123 | fc4 = layer(fc3, self.action_space_size, is_output=True) 124 | 125 | output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset 126 | 127 | return output 128 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/experience_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class ExperienceBuffer(): 4 | 5 | def __init__(self, max_buffer_size, batch_size): 6 | self.size = 0 7 | self.max_buffer_size = max_buffer_size 8 | self.experiences = [] 9 | self.batch_size = batch_size 10 | 11 | def add(self, experience): 12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')' 13 | assert type(experience[5]) == bool 14 | 15 | self.experiences.append(experience) 16 | self.size += 1 17 | 18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance 19 | if self.size >= self.max_buffer_size: 20 | beg_index = int(np.floor(self.max_buffer_size/6)) 21 | self.experiences = self.experiences[beg_index:] 22 | self.size -= beg_index 23 | 24 | def get_batch(self): 25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], [] 26 | dist = np.random.randint(0, high=self.size, size=min(self.size, self.batch_size)) 27 | 28 | for i in dist: 29 | states.append(self.experiences[i][0]) 30 | actions.append(self.experiences[i][1]) 31 | rewards.append(self.experiences[i][2]) 32 | new_states.append(self.experiences[i][3]) 33 | goals.append(self.experiences[i][4]) 34 | is_terminals.append(self.experiences[i][5]) 35 | 36 | return states, actions, rewards, new_states, goals, is_terminals 37 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/initialize_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified 3 | by the user and instantiates the environment and agent. 4 | """ 5 | 6 | from design_agent_and_env import design_agent_and_env 7 | from options import parse_options 8 | from agent import Agent 9 | from run_HAC import run_HAC 10 | 11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file. 12 | FLAGS = parse_options() 13 | 14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file. 15 | agent, env = design_agent_and_env(FLAGS) 16 | 17 | # Begin training 18 | run_HAC(FLAGS,env,agent) 19 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.data-00000-of-00001 -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.index -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99.meta -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/models/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "/Users/andrewlevy/Documents/GitHub/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99" 2 | all_model_checkpoint_paths: "/Users/andrewlevy/Documents/GitHub/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_reacher_2_levels/models/HAC.ckpt-99" 3 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The dm_control Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """Functions to manage the common assets for domains.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | from dm_control.utils import resources 24 | 25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__)) 26 | _FILENAMES = [ 27 | "common/materials.xml", 28 | "common/skybox.xml", 29 | "common/visual.xml", 30 | ] 31 | 32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename)) 33 | for filename in _FILENAMES} 34 | 35 | 36 | def read_model(model_filename): 37 | """Reads a model XML file and returns its contents as a string.""" 38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename)) 39 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/common/materials.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/common/skybox.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/common/visual.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_base.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_forearm.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_shoulder.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_upperarm.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist1.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist2.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/c_wrist3.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup_2.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/glass_cup_3.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_finger_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/new_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/new_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_finger_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/red_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/red_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/upd_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/upd_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_base.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_forearm.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_shoulder.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_upperarm.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist1.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist2.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_2_levels/mujoco_files/muj_gripper/v_wrist3.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/mujoco_files/pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | """ 4 | Below are training options user can specify in command line. 5 | 6 | Options Include: 7 | 8 | 1. Retrain boolean ("--retrain") 9 | - If included, actor and critic neural network parameters are reset 10 | 11 | 2. Testing boolean ("--test") 12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks. 13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress. 14 | 15 | 3. Show boolean ("--show") 16 | - If included, training will be visualized 17 | 18 | 4. Train Only boolean ("--train_only") 19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing 20 | 21 | 5. Verbosity boolean ("--verbose") 22 | - If included, summary of each transition will be printed 23 | 24 | 6. All Trans boolean ("--all_trans") 25 | - If included, all transitions including (i) hindsight action, (ii) subgoal penalty, (iii) preliminary HER, and (iv) final HER transitions will be printed. Use below options to print out specific types of transitions. 26 | 27 | 7. Hindsight Action trans boolean ("hind_action") 28 | - If included, prints hindsight actions transitions for each level 29 | 30 | 8. Subgoal Penalty trans ("penalty") 31 | - If included, prints the subgoal penalty transitions 32 | 33 | 9. Preliminary HER trans ("prelim_HER") 34 | -If included, prints the preliminary HER transitions (i.e., with TBD reward and goal components) 35 | 36 | 10. HER trans ("HER") 37 | - If included, prints the final HER transitions for each level 38 | 39 | 11. Show Q-values ("--Q_values") 40 | - Show Q-values for each action by each level 41 | 42 | """ 43 | 44 | def parse_options(): 45 | parser = argparse.ArgumentParser() 46 | 47 | parser.add_argument( 48 | '--retrain', 49 | action='store_true', 50 | help='Include to reset policy' 51 | ) 52 | 53 | parser.add_argument( 54 | '--test', 55 | action='store_true', 56 | help='Include to fix current policy' 57 | ) 58 | 59 | parser.add_argument( 60 | '--show', 61 | action='store_true', 62 | help='Include to visualize training' 63 | ) 64 | 65 | parser.add_argument( 66 | '--train_only', 67 | action='store_true', 68 | help='Include to use training mode only' 69 | ) 70 | 71 | parser.add_argument( 72 | '--verbose', 73 | action='store_true', 74 | help='Print summary of each transition' 75 | ) 76 | 77 | parser.add_argument( 78 | '--all_trans', 79 | action='store_true', 80 | help='Print summary of each transition' 81 | ) 82 | 83 | parser.add_argument( 84 | '--hind_action', 85 | action='store_true', 86 | help='Print summary of each transition' 87 | ) 88 | 89 | parser.add_argument( 90 | '--penalty', 91 | action='store_true', 92 | help='Print summary of each transition' 93 | ) 94 | 95 | parser.add_argument( 96 | '--prelim_HER', 97 | action='store_true', 98 | help='Print summary of each transition' 99 | ) 100 | 101 | parser.add_argument( 102 | '--HER', 103 | action='store_true', 104 | help='Print summary of each transition' 105 | ) 106 | 107 | parser.add_argument( 108 | '--Q_values', 109 | action='store_true', 110 | help='Print summary of each transition' 111 | ) 112 | 113 | FLAGS, unparsed = parser.parse_known_args() 114 | 115 | 116 | return FLAGS 117 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/run_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file. 3 | """ 4 | 5 | import pickle as cpickle 6 | import agent as Agent 7 | from utils import print_summary 8 | 9 | NUM_BATCH = 1000 10 | TEST_FREQ = 2 11 | 12 | num_test_episodes = 100 13 | 14 | def run_HAC(FLAGS,env,agent): 15 | 16 | # Print task summary 17 | print_summary(FLAGS,env) 18 | 19 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress 20 | mix_train_test = False 21 | if not FLAGS.test and not FLAGS.train_only: 22 | mix_train_test = True 23 | 24 | # Track total training episodes completed 25 | total_episodes = 0 26 | 27 | for batch in range(NUM_BATCH): 28 | 29 | num_episodes = agent.other_params["num_exploration_episodes"] 30 | 31 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing 32 | if mix_train_test and batch % TEST_FREQ == 0: 33 | print("\n--- TESTING ---") 34 | agent.FLAGS.test = True 35 | num_episodes = num_test_episodes 36 | 37 | # Reset successful episode counter 38 | successful_episodes = 0 39 | 40 | for episode in range(num_episodes): 41 | 42 | print("\nBatch %d, Episode %d" % (batch, episode)) 43 | 44 | # Train for an episode 45 | success = agent.train(env, episode, total_episodes) 46 | 47 | if FLAGS.train_only or (mix_train_test and batch % TEST_FREQ != 0): 48 | total_episodes += 1 49 | 50 | if success: 51 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode)) 52 | 53 | # Increment successful episode counter if applicable 54 | if mix_train_test and batch % TEST_FREQ == 0: 55 | successful_episodes += 1 56 | 57 | # Save agent 58 | agent.save_model(episode) 59 | 60 | # Finish evaluating policy if tested prior batch 61 | if mix_train_test and batch % TEST_FREQ == 0: 62 | 63 | # Log performance 64 | success_rate = successful_episodes / num_test_episodes * 100 65 | print("\nTesting Success Rate %.2f%%" % success_rate) 66 | agent.log_performance(success_rate) 67 | agent.FLAGS.test = False 68 | 69 | print("\n--- END TESTING ---\n") 70 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_2_levels/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | 4 | def layer(input_layer, num_next_neurons, is_output=False): 5 | num_prev_neurons = int(input_layer.shape[1]) 6 | shape = [num_prev_neurons, num_next_neurons] 7 | 8 | if is_output: 9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 11 | else: 12 | # 1/sqrt(f) 13 | fan_in_init = 1 / num_prev_neurons ** 0.5 14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 16 | 17 | weights = tf.get_variable("weights", shape, initializer=weight_init) 18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 19 | 20 | dot = tf.matmul(input_layer, weights) + biases 21 | 22 | if is_output: 23 | return dot 24 | 25 | relu = tf.nn.relu(dot) 26 | return relu 27 | 28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False): 29 | num_prev_neurons = int(input_layer.shape[1]) 30 | shape = [num_prev_neurons, num_next_neurons] 31 | 32 | 33 | fan_in_init = 1 / num_prev_neurons ** 0.5 34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 36 | 37 | weights = tf.get_variable("weights", shape, initializer=weight_init) 38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 39 | 40 | dot = tf.matmul(input_layer, weights) + biases 41 | 42 | if is_output: 43 | return dot 44 | 45 | relu = tf.nn.relu(dot) 46 | return relu 47 | 48 | 49 | # Below function prints out options and environment specified by user 50 | def print_summary(FLAGS,env): 51 | 52 | print("\n- - - - - - - - - - -") 53 | print("Task Summary: ","\n") 54 | print("Environment: ", env.name) 55 | print("Number of Layers: ", FLAGS.layers) 56 | print("Time Limit per Layer: ", FLAGS.time_scale) 57 | print("Max Episode Time Steps: ", env.max_actions) 58 | print("Retrain: ", FLAGS.retrain) 59 | print("Test: ", FLAGS.test) 60 | print("Visualize: ", FLAGS.show) 61 | print("- - - - - - - - - - -", "\n\n") 62 | 63 | 64 | # Below function ensures environment configurations were properly entered 65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action): 66 | 67 | # Ensure model file is an ".xml" file 68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file" 69 | 70 | # Ensure upper bounds of range is >= lower bound of range 71 | if goal_space_train is not None: 72 | for i in range(len(goal_space_train)): 73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound" 74 | 75 | if goal_space_test is not None: 76 | for i in range(len(goal_space_test)): 77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound" 78 | 79 | for i in range(len(initial_state_space)): 80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound" 81 | 82 | for i in range(len(subgoal_bounds)): 83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound" 84 | 85 | # Make sure end goal spaces and thresholds have same first dimension 86 | if goal_space_train is not None and goal_space_test is not None: 87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension" 88 | 89 | # Makde sure suboal spaces and thresholds have same dimensions 90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension" 91 | 92 | # Ensure max action and timesteps_per_action are postive integers 93 | assert max_actions > 0, "Max actions should be a positive integer" 94 | 95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer" 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical Actor-Critc (HAC) 2 | This repository contains the code to implement the *Hierarchical Actor-Critic (HAC)* algorithm. HAC helps agents learn tasks more quickly by enabling them to break problems down into short sequences of actions. 3 | 4 | To run HAC, execute the command *"python3 initialize_HAC.py --retrain"*. By default, this will train a UR5 agent with a 3-level hierarchy to learn to achieve certain poses. This UR5 agent should achieve a 90+% success rate in around 350 episodes. The following [video](https://www.youtube.com/watch?v=R86Vs9Vb6Bc) shows how a 3-layered agent performed after 450 episodes of training. In order to watch your trained agent, execute the command *"python3 initialize_HAC.py --test --show"*. Please note that in order to run this repository, you must have (i) a MuJoCo [license](https://www.roboti.us/license.html), (ii) the required MuJoCo software [libraries](https://www.roboti.us/index.html), and (iii) the MuJoCo Python [wrapper](https://github.com/openai/mujoco-py) from OpenAI. 5 | 6 | To run HAC with your own agents and MuJoCo environments, you need to complete the template in the *"design_agent_and_env.py"* file. The *"example_designs"* folder contains other examples of design templates that build different agents in the UR5 reacher and inverted pendulum environments. 7 | 8 | Happy to answer any questions you have. Please email me at andrew_levy2@brown.edu. 9 | 10 | ## UPDATE LOG 11 | 12 | ### 10/12/2018 - Key Changes 13 | 1. Bounded Q-Values 14 | 15 | The Q-values output by the critic network at each level are now bounded between *[-T,0]*, in which *T* is the max sequence length in which each policy specializes as well as the negative of the subgoal penalty. We use an upper bound of 0 because our code uses a nonpositive reward function. Consequently, Q-values should never be positive. However, we noticed that somtimes the critic function approximator would make small mistakes and assign positive Q-values, which occassionally proved harmful to results. In addition, we observed improved results when we used a tighter lower bound of *-T* (i.e., the subgoal penalty). The improved results may result from the increased flexibility the bounded Q-values provides the critic. The critic can assign a value of *-T* to any (state,action,goal) tuple, in which the action does not bring the agent close to the goal, instead of having to learn the exact value. 16 | 17 | 2. Removed Target Networks 18 | 19 | We also noticed improved results when we used the regular Q-networks to determine the Bellman target updates (i.e., *reward + Q(next state,pi(next state),goal)*) instead of the separate target networks that are used in DDPG. The default setting of our code base thus no longer uses target networks. However, the target networks can be easily activated by making the changes specified in (i) the *"learn"* method in the *"layer.py"* file and (ii) the *"update"* method in the *"critic.py"* file. 20 | 21 | 3. Centralized Design Template 22 | 23 | Users can now configure the agent and environment in the single file, *"design_agent_and_env.py"*. This template file contains most of the significant hyperparameters in HAC. We have removed the command-line options that can change the architecture of the agent's hierarchy. 24 | 25 | 4. Added UR5 Reacher Environment 26 | 27 | We have added a new UR5 reacher environment, in which a UR5 agent can learn to achieve various poses. The *"ur5.xml"* MuJoCo file also contains commented code for a Robotiq gripper if you would like to augment the agent. Additional environments will hopefully be added shortly. 28 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/actor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/actor.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/agent.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/critic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/critic.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/design_agent_and_env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/design_agent_and_env.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/environment.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/environment.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/experience_buffer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/experience_buffer.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/layer.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/options.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/options.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/run_HAC.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/run_HAC.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/experience_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class ExperienceBuffer(): 4 | 5 | def __init__(self, max_buffer_size, batch_size): 6 | self.size = 0 7 | self.max_buffer_size = max_buffer_size 8 | self.experiences = [] 9 | self.batch_size = batch_size 10 | 11 | def add(self, experience): 12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')' 13 | assert type(experience[5]) == bool 14 | 15 | self.experiences.append(experience) 16 | self.size += 1 17 | 18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance 19 | if self.size >= self.max_buffer_size: 20 | beg_index = int(np.floor(self.max_buffer_size/6)) 21 | self.experiences = self.experiences[beg_index:] 22 | self.size -= beg_index 23 | 24 | def get_batch(self): 25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], [] 26 | dist = np.random.randint(0, high=self.size, size=min(self.size, self.batch_size)) 27 | 28 | for i in dist: 29 | states.append(self.experiences[i][0]) 30 | actions.append(self.experiences[i][1]) 31 | rewards.append(self.experiences[i][2]) 32 | new_states.append(self.experiences[i][3]) 33 | goals.append(self.experiences[i][4]) 34 | is_terminals.append(self.experiences[i][5]) 35 | 36 | return states, actions, rewards, new_states, goals, is_terminals 37 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/initialize_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified 3 | by the user and instantiates the environment and agent. 4 | """ 5 | 6 | from design_agent_and_env import design_agent_and_env 7 | from options import parse_options 8 | from agent import Agent 9 | from run_HAC import run_HAC 10 | 11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file. 12 | FLAGS = parse_options() 13 | 14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file. 15 | agent, env = design_agent_and_env(FLAGS) 16 | 17 | # Begin training 18 | run_HAC(FLAGS,env,agent) 19 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.data-00000-of-00001 -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.index -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99.meta -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/models/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "/Users/andrewlevy/Documents/GitHub/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99" 2 | all_model_checkpoint_paths: "/Users/andrewlevy/Documents/GitHub/Hierarchical-Actor-Critc-HAC-/ant_environments/ant_reacher_3_levels/models/HAC.ckpt-99" 3 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The dm_control Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """Functions to manage the common assets for domains.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | from dm_control.utils import resources 24 | 25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__)) 26 | _FILENAMES = [ 27 | "common/materials.xml", 28 | "common/skybox.xml", 29 | "common/visual.xml", 30 | ] 31 | 32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename)) 33 | for filename in _FILENAMES} 34 | 35 | 36 | def read_model(model_filename): 37 | """Reads a model XML file and returns its contents as a string.""" 38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename)) 39 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/common/materials.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/common/skybox.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/common/visual.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_base.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_forearm.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_shoulder.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_upperarm.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist1.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist2.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/c_wrist3.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup_2.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/glass_cup_3.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_finger_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_finger_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_knuckle_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/inner_knuckle_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/new_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/new_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_finger_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_finger_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_knuckle_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/outer_knuckle_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/red_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/red_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/smaller_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/upd_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/upd_solo_cup.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_base.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_forearm.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_shoulder.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_upperarm.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist1.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist2.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/ant_environments/ant_reacher_3_levels/mujoco_files/muj_gripper/v_wrist3.stl -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/mujoco_files/pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | """ 4 | Below are training options user can specify in command line. 5 | 6 | Options Include: 7 | 8 | 1. Retrain boolean ("--retrain") 9 | - If included, actor and critic neural network parameters are reset 10 | 11 | 2. Testing boolean ("--test") 12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks. 13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress. 14 | 15 | 3. Show boolean ("--show") 16 | - If included, training will be visualized 17 | 18 | 4. Train Only boolean ("--train_only") 19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing 20 | 21 | 5. Verbosity boolean ("--verbose") 22 | - If included, summary of each transition will be printed 23 | 24 | 6. All Trans boolean ("--all_trans") 25 | - If included, all transitions including (i) hindsight action, (ii) subgoal penalty, (iii) preliminary HER, and (iv) final HER transitions will be printed. Use below options to print out specific types of transitions. 26 | 27 | 7. Hindsight Action trans boolean ("hind_action") 28 | - If included, prints hindsight actions transitions for each level 29 | 30 | 8. Subgoal Penalty trans ("penalty") 31 | - If included, prints the subgoal penalty transitions 32 | 33 | 9. Preliminary HER trans ("prelim_HER") 34 | -If included, prints the preliminary HER transitions (i.e., with TBD reward and goal components) 35 | 36 | 10. HER trans ("HER") 37 | - If included, prints the final HER transitions for each level 38 | 39 | 11. Show Q-values ("--Q_values") 40 | - Show Q-values for each action by each level 41 | 42 | """ 43 | 44 | def parse_options(): 45 | parser = argparse.ArgumentParser() 46 | 47 | parser.add_argument( 48 | '--retrain', 49 | action='store_true', 50 | help='Include to reset policy' 51 | ) 52 | 53 | parser.add_argument( 54 | '--test', 55 | action='store_true', 56 | help='Include to fix current policy' 57 | ) 58 | 59 | parser.add_argument( 60 | '--show', 61 | action='store_true', 62 | help='Include to visualize training' 63 | ) 64 | 65 | parser.add_argument( 66 | '--train_only', 67 | action='store_true', 68 | help='Include to use training mode only' 69 | ) 70 | 71 | parser.add_argument( 72 | '--verbose', 73 | action='store_true', 74 | help='Print summary of each transition' 75 | ) 76 | 77 | parser.add_argument( 78 | '--all_trans', 79 | action='store_true', 80 | help='Print summary of each transition' 81 | ) 82 | 83 | parser.add_argument( 84 | '--hind_action', 85 | action='store_true', 86 | help='Print summary of each transition' 87 | ) 88 | 89 | parser.add_argument( 90 | '--penalty', 91 | action='store_true', 92 | help='Print summary of each transition' 93 | ) 94 | 95 | parser.add_argument( 96 | '--prelim_HER', 97 | action='store_true', 98 | help='Print summary of each transition' 99 | ) 100 | 101 | parser.add_argument( 102 | '--HER', 103 | action='store_true', 104 | help='Print summary of each transition' 105 | ) 106 | 107 | parser.add_argument( 108 | '--Q_values', 109 | action='store_true', 110 | help='Print summary of each transition' 111 | ) 112 | 113 | FLAGS, unparsed = parser.parse_known_args() 114 | 115 | 116 | return FLAGS 117 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/run_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file. 3 | """ 4 | 5 | import pickle as cpickle 6 | import agent as Agent 7 | from utils import print_summary 8 | 9 | NUM_BATCH = 1000 10 | TEST_FREQ = 2 11 | 12 | num_test_episodes = 100 13 | 14 | def run_HAC(FLAGS,env,agent): 15 | 16 | # Print task summary 17 | print_summary(FLAGS,env) 18 | 19 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress 20 | mix_train_test = False 21 | if not FLAGS.test and not FLAGS.train_only: 22 | mix_train_test = True 23 | 24 | # Track total training episodes completed 25 | total_episodes = 0 26 | 27 | for batch in range(NUM_BATCH): 28 | 29 | num_episodes = agent.other_params["num_exploration_episodes"] 30 | 31 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing 32 | if mix_train_test and batch % TEST_FREQ == 0: 33 | print("\n--- TESTING ---") 34 | agent.FLAGS.test = True 35 | num_episodes = num_test_episodes 36 | 37 | # Reset successful episode counter 38 | successful_episodes = 0 39 | 40 | for episode in range(num_episodes): 41 | 42 | print("\nBatch %d, Episode %d" % (batch, episode)) 43 | 44 | # Train for an episode 45 | success = agent.train(env, episode, total_episodes) 46 | 47 | if FLAGS.train_only or (mix_train_test and batch % TEST_FREQ != 0): 48 | total_episodes += 1 49 | 50 | if success: 51 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode)) 52 | 53 | # Increment successful episode counter if applicable 54 | if mix_train_test and batch % TEST_FREQ == 0: 55 | successful_episodes += 1 56 | 57 | # Save agent 58 | agent.save_model(episode) 59 | 60 | # Finish evaluating policy if tested prior batch 61 | if mix_train_test and batch % TEST_FREQ == 0: 62 | 63 | # Log performance 64 | success_rate = successful_episodes / num_test_episodes * 100 65 | print("\nTesting Success Rate %.2f%%" % success_rate) 66 | agent.log_performance(success_rate) 67 | agent.FLAGS.test = False 68 | 69 | print("\n--- END TESTING ---\n") 70 | -------------------------------------------------------------------------------- /ant_environments/ant_reacher_3_levels/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | 4 | def layer(input_layer, num_next_neurons, is_output=False): 5 | num_prev_neurons = int(input_layer.shape[1]) 6 | shape = [num_prev_neurons, num_next_neurons] 7 | 8 | if is_output: 9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 11 | else: 12 | # 1/sqrt(f) 13 | fan_in_init = 1 / num_prev_neurons ** 0.5 14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 16 | 17 | weights = tf.get_variable("weights", shape, initializer=weight_init) 18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 19 | 20 | dot = tf.matmul(input_layer, weights) + biases 21 | 22 | if is_output: 23 | return dot 24 | 25 | relu = tf.nn.relu(dot) 26 | return relu 27 | 28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False): 29 | num_prev_neurons = int(input_layer.shape[1]) 30 | shape = [num_prev_neurons, num_next_neurons] 31 | 32 | 33 | fan_in_init = 1 / num_prev_neurons ** 0.5 34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 36 | 37 | weights = tf.get_variable("weights", shape, initializer=weight_init) 38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 39 | 40 | dot = tf.matmul(input_layer, weights) + biases 41 | 42 | if is_output: 43 | return dot 44 | 45 | relu = tf.nn.relu(dot) 46 | return relu 47 | 48 | 49 | # Below function prints out options and environment specified by user 50 | def print_summary(FLAGS,env): 51 | 52 | print("\n- - - - - - - - - - -") 53 | print("Task Summary: ","\n") 54 | print("Environment: ", env.name) 55 | print("Number of Layers: ", FLAGS.layers) 56 | print("Time Limit per Layer: ", FLAGS.time_scale) 57 | print("Max Episode Time Steps: ", env.max_actions) 58 | print("Retrain: ", FLAGS.retrain) 59 | print("Test: ", FLAGS.test) 60 | print("Visualize: ", FLAGS.show) 61 | print("- - - - - - - - - - -", "\n\n") 62 | 63 | 64 | # Below function ensures environment configurations were properly entered 65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action): 66 | 67 | # Ensure model file is an ".xml" file 68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file" 69 | 70 | # Ensure upper bounds of range is >= lower bound of range 71 | if goal_space_train is not None: 72 | for i in range(len(goal_space_train)): 73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound" 74 | 75 | if goal_space_test is not None: 76 | for i in range(len(goal_space_test)): 77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound" 78 | 79 | for i in range(len(initial_state_space)): 80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound" 81 | 82 | for i in range(len(subgoal_bounds)): 83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound" 84 | 85 | # Make sure end goal spaces and thresholds have same first dimension 86 | if goal_space_train is not None and goal_space_test is not None: 87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension" 88 | 89 | # Makde sure suboal spaces and thresholds have same dimensions 90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension" 91 | 92 | # Ensure max action and timesteps_per_action are postive integers 93 | assert max_actions > 0, "Max actions should be a positive integer" 94 | 95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer" 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /experience_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class ExperienceBuffer(): 4 | 5 | def __init__(self, max_buffer_size, batch_size): 6 | self.size = 0 7 | self.max_buffer_size = max_buffer_size 8 | self.experiences = [] 9 | self.batch_size = batch_size 10 | 11 | def add(self, experience): 12 | assert len(experience) == 7, 'Experience must be of form (s, a, r, s, g, t, grip_info\')' 13 | assert type(experience[5]) == bool 14 | 15 | self.experiences.append(experience) 16 | self.size += 1 17 | 18 | # If replay buffer is filled, remove a percentage of replay buffer. Only removing a single transition slows down performance 19 | if self.size >= self.max_buffer_size: 20 | beg_index = int(np.floor(self.max_buffer_size/6)) 21 | self.experiences = self.experiences[beg_index:] 22 | self.size -= beg_index 23 | 24 | def get_batch(self): 25 | states, actions, rewards, new_states, goals, is_terminals = [], [], [], [], [], [] 26 | dist = np.random.randint(0, high=self.size, size=self.batch_size) 27 | 28 | for i in dist: 29 | states.append(self.experiences[i][0]) 30 | actions.append(self.experiences[i][1]) 31 | rewards.append(self.experiences[i][2]) 32 | new_states.append(self.experiences[i][3]) 33 | goals.append(self.experiences[i][4]) 34 | is_terminals.append(self.experiences[i][5]) 35 | 36 | return states, actions, rewards, new_states, goals, is_terminals 37 | -------------------------------------------------------------------------------- /initialize_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the starting file for the Hierarchical Actor-Critc (HAC) algorithm. The below script processes the command-line options specified 3 | by the user and instantiates the environment and agent. 4 | """ 5 | 6 | from design_agent_and_env import design_agent_and_env 7 | from options import parse_options 8 | from agent import Agent 9 | from run_HAC import run_HAC 10 | 11 | # Determine training options specified by user. The full list of available options can be found in "options.py" file. 12 | FLAGS = parse_options() 13 | 14 | # Instantiate the agent and Mujoco environment. The designer must assign values to the hyperparameters listed in the "design_agent_and_env.py" file. 15 | agent, env = design_agent_and_env(FLAGS) 16 | 17 | # Begin training 18 | run_HAC(FLAGS,env,agent) 19 | -------------------------------------------------------------------------------- /mujoco_files/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The dm_control Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """Functions to manage the common assets for domains.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | from dm_control.utils import resources 24 | 25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__)) 26 | _FILENAMES = [ 27 | "common/materials.xml", 28 | "common/skybox.xml", 29 | "common/visual.xml", 30 | ] 31 | 32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename)) 33 | for filename in _FILENAMES} 34 | 35 | 36 | def read_model(model_filename): 37 | """Reads a model XML file and returns its contents as a string.""" 38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename)) 39 | -------------------------------------------------------------------------------- /mujoco_files/common/materials.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /mujoco_files/common/skybox.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /mujoco_files/common/visual.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/c_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_base.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/c_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_forearm.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/c_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_shoulder.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/c_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_upperarm.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/c_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_wrist1.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/c_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_wrist2.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/c_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/c_wrist3.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/glass_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/glass_cup.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/glass_cup_2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/glass_cup_2.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/glass_cup_3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/glass_cup_3.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/inner_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/inner_finger_coarse.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/inner_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/inner_finger_fine.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/inner_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/inner_knuckle_coarse.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/inner_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/inner_knuckle_fine.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/new_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/new_solo_cup.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/outer_finger_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/outer_finger_coarse.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/outer_finger_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/outer_finger_fine.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/outer_knuckle_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/outer_knuckle_coarse.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/outer_knuckle_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/outer_knuckle_fine.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/red_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/red_solo_cup.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/robotiq_85_base_link_coarse.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/robotiq_85_base_link_fine.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/smaller_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/smaller_solo_cup.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/solo_cup.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/upd_solo_cup.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/upd_solo_cup.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/v_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_base.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/v_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_forearm.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_L.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_robotiq_85_gripper_joint_3_R.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/v_shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_shoulder.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/v_upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_upperarm.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/v_wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_wrist1.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/v_wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_wrist2.stl -------------------------------------------------------------------------------- /mujoco_files/muj_gripper/v_wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrew-j-levy/Hierarchical-Actor-Critc-HAC-/e2402577991d3522206ec40e3dc95e485f1597b7/mujoco_files/muj_gripper/v_wrist3.stl -------------------------------------------------------------------------------- /mujoco_files/pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | """ 4 | Below are training options user can specify in command line. 5 | 6 | Options Include: 7 | 8 | 1. Retrain boolean 9 | - If included, actor and critic neural network parameters are reset 10 | 11 | 2. Testing boolean 12 | - If included, agent only uses greedy policy without noise. No changes are made to policy and neural networks. 13 | - If not included, periods of training are by default interleaved with periods of testing to evaluate progress. 14 | 15 | 3. Show boolean 16 | - If included, training will be visualized 17 | 18 | 4. Train Only boolean 19 | - If included, agent will be solely in training mode and will not interleave periods of training and testing 20 | 21 | 5. Verbosity boolean 22 | - If included, summary of each transition will be printed 23 | """ 24 | 25 | def parse_options(): 26 | parser = argparse.ArgumentParser() 27 | 28 | parser.add_argument( 29 | '--retrain', 30 | action='store_true', 31 | help='Include to reset policy' 32 | ) 33 | 34 | parser.add_argument( 35 | '--test', 36 | action='store_true', 37 | help='Include to fix current policy' 38 | ) 39 | 40 | parser.add_argument( 41 | '--show', 42 | action='store_true', 43 | help='Include to visualize training' 44 | ) 45 | 46 | parser.add_argument( 47 | '--train_only', 48 | action='store_true', 49 | help='Include to use training mode only' 50 | ) 51 | 52 | parser.add_argument( 53 | '--verbose', 54 | action='store_true', 55 | help='Print summary of each transition' 56 | ) 57 | 58 | FLAGS, unparsed = parser.parse_known_args() 59 | 60 | 61 | return FLAGS 62 | -------------------------------------------------------------------------------- /run_HAC.py: -------------------------------------------------------------------------------- 1 | """ 2 | "run_HAC.py" executes the training schedule for the agent. By default, the agent will alternate between exploration and testing phases. The number of episodes in the exploration phase can be configured in section 3 of "design_agent_and_env.py" file. If the user prefers to only explore or only test, the user can enter the command-line options ""--train_only" or "--test", respectively. The full list of command-line options is available in the "options.py" file. 3 | """ 4 | 5 | import pickle as cpickle 6 | import agent as Agent 7 | from utils import print_summary 8 | 9 | NUM_BATCH = 1000 10 | TEST_FREQ = 2 11 | 12 | num_test_episodes = 100 13 | 14 | def run_HAC(FLAGS,env,agent): 15 | 16 | # Print task summary 17 | print_summary(FLAGS,env) 18 | 19 | # Determine training mode. If not testing and not solely training, interleave training and testing to track progress 20 | mix_train_test = False 21 | if not FLAGS.test and not FLAGS.train_only: 22 | mix_train_test = True 23 | 24 | for batch in range(NUM_BATCH): 25 | 26 | num_episodes = agent.other_params["num_exploration_episodes"] 27 | 28 | # Evaluate policy every TEST_FREQ batches if interleaving training and testing 29 | if mix_train_test and batch % TEST_FREQ == 0: 30 | print("\n--- TESTING ---") 31 | agent.FLAGS.test = True 32 | num_episodes = num_test_episodes 33 | 34 | # Reset successful episode counter 35 | successful_episodes = 0 36 | 37 | for episode in range(num_episodes): 38 | 39 | print("\nBatch %d, Episode %d" % (batch, episode)) 40 | 41 | # Train for an episode 42 | success = agent.train(env, episode) 43 | 44 | if success: 45 | print("Batch %d, Episode %d End Goal Achieved\n" % (batch, episode)) 46 | 47 | # Increment successful episode counter if applicable 48 | if mix_train_test and batch % TEST_FREQ == 0: 49 | successful_episodes += 1 50 | 51 | # Save agent 52 | agent.save_model(episode) 53 | 54 | # Finish evaluating policy if tested prior batch 55 | if mix_train_test and batch % TEST_FREQ == 0: 56 | 57 | # Log performance 58 | success_rate = successful_episodes / num_test_episodes * 100 59 | print("\nTesting Success Rate %.2f%%" % success_rate) 60 | agent.log_performance(success_rate) 61 | agent.FLAGS.test = False 62 | 63 | print("\n--- END TESTING ---\n") 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | 4 | def layer(input_layer, num_next_neurons, is_output=False): 5 | num_prev_neurons = int(input_layer.shape[1]) 6 | shape = [num_prev_neurons, num_next_neurons] 7 | 8 | if is_output: 9 | weight_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 10 | bias_init = tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3) 11 | else: 12 | # 1/sqrt(f) 13 | fan_in_init = 1 / num_prev_neurons ** 0.5 14 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 15 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 16 | 17 | weights = tf.get_variable("weights", shape, initializer=weight_init) 18 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 19 | 20 | dot = tf.matmul(input_layer, weights) + biases 21 | 22 | if is_output: 23 | return dot 24 | 25 | relu = tf.nn.relu(dot) 26 | return relu 27 | 28 | def layer_goal_nn(input_layer, num_next_neurons, is_output=False): 29 | num_prev_neurons = int(input_layer.shape[1]) 30 | shape = [num_prev_neurons, num_next_neurons] 31 | 32 | 33 | fan_in_init = 1 / num_prev_neurons ** 0.5 34 | weight_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 35 | bias_init = tf.random_uniform_initializer(minval=-fan_in_init, maxval=fan_in_init) 36 | 37 | weights = tf.get_variable("weights", shape, initializer=weight_init) 38 | biases = tf.get_variable("biases", [num_next_neurons], initializer=bias_init) 39 | 40 | dot = tf.matmul(input_layer, weights) + biases 41 | 42 | if is_output: 43 | return dot 44 | 45 | relu = tf.nn.relu(dot) 46 | return relu 47 | 48 | 49 | # Below function prints out options and environment specified by user 50 | def print_summary(FLAGS,env): 51 | 52 | print("\n- - - - - - - - - - -") 53 | print("Task Summary: ","\n") 54 | print("Environment: ", env.name) 55 | print("Number of Layers: ", FLAGS.layers) 56 | print("Time Limit per Layer: ", FLAGS.time_scale) 57 | print("Max Episode Time Steps: ", env.max_actions) 58 | print("Retrain: ", FLAGS.retrain) 59 | print("Test: ", FLAGS.test) 60 | print("Visualize: ", FLAGS.show) 61 | print("- - - - - - - - - - -", "\n\n") 62 | 63 | 64 | # Below function ensures environment configurations were properly entered 65 | def check_validity(model_name, goal_space_train, goal_space_test, end_goal_thresholds, initial_state_space, subgoal_bounds, subgoal_thresholds, max_actions, timesteps_per_action): 66 | 67 | # Ensure model file is an ".xml" file 68 | assert model_name[-4:] == ".xml", "Mujoco model must be an \".xml\" file" 69 | 70 | # Ensure upper bounds of range is >= lower bound of range 71 | if goal_space_train is not None: 72 | for i in range(len(goal_space_train)): 73 | assert goal_space_train[i][1] >= goal_space_train[i][0], "In the training goal space, upper bound must be >= lower bound" 74 | 75 | if goal_space_test is not None: 76 | for i in range(len(goal_space_test)): 77 | assert goal_space_test[i][1] >= goal_space_test[i][0], "In the training goal space, upper bound must be >= lower bound" 78 | 79 | for i in range(len(initial_state_space)): 80 | assert initial_state_space[i][1] >= initial_state_space[i][0], "In initial state space, upper bound must be >= lower bound" 81 | 82 | for i in range(len(subgoal_bounds)): 83 | assert subgoal_bounds[i][1] >= subgoal_bounds[i][0], "In subgoal space, upper bound must be >= lower bound" 84 | 85 | # Make sure end goal spaces and thresholds have same first dimension 86 | if goal_space_train is not None and goal_space_test is not None: 87 | assert len(goal_space_train) == len(goal_space_test) == len(end_goal_thresholds), "End goal space and thresholds must have same first dimension" 88 | 89 | # Makde sure suboal spaces and thresholds have same dimensions 90 | assert len(subgoal_bounds) == len(subgoal_thresholds), "Subgoal space and thresholds must have same first dimension" 91 | 92 | # Ensure max action and timesteps_per_action are postive integers 93 | assert max_actions > 0, "Max actions should be a positive integer" 94 | 95 | assert timesteps_per_action > 0, "Timesteps per action should be a positive integer" 96 | 97 | 98 | 99 | --------------------------------------------------------------------------------