├── LICENSE ├── README.md ├── agents └── bdq │ ├── README.md │ ├── __init__.py │ ├── common │ ├── __init__.py │ ├── atari_wrappers_deprecated.py │ ├── azure_utils.py │ ├── misc_util.py │ ├── schedules.py │ ├── segment_tree.py │ └── tf_util.py │ ├── deepq │ ├── __init__.py │ ├── build_graph.py │ ├── models.py │ ├── procedure_continuous_tasks.py │ └── replay_buffer.py │ ├── enjoy_continuous.py │ ├── logger.py │ ├── train_continuous.py │ └── trained_models │ ├── Hopper-v1 │ └── Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_11-02-56_Hopper-v1.pkl │ ├── Humanoid-v1 │ └── Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-01-29_11-52-42_Humanoid-v1.pkl │ ├── Reacher-v1 │ └── Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_01-01-07_Reacher-v1.pkl │ ├── Reacher3DOF-v0 │ └── Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_00-49-32_Reacher3DOF-v0.pkl │ ├── Reacher4DOF-v0 │ └── Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_00-50-30_Reacher4DOF-v0.pkl │ ├── Reacher5DOF-v0 │ └── Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_00-51-17_Reacher5DOF-v0.pkl │ ├── Reacher6DOF-v0 │ └── Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_01-03-10_Reacher6DOF-v0.pkl │ └── Walker2d-v1 │ └── Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_13-08-52_Walker2d-v1.pkl ├── data ├── BDQ_Hopper-v1.gif ├── BDQ_Humanoid-v1.gif ├── BDQ_Reacher-v1.gif ├── BDQ_Reacher3DOF-v0.gif ├── BDQ_Reacher4DOF-v0.gif ├── BDQ_Reacher5DOF-v0.gif ├── BDQ_Reacher6DOF-v0.gif ├── BDQ_Walker2d-v1.gif ├── action_branching_architecture.png └── bdq_network.png └── envs ├── __init__.py └── mujoco ├── assets ├── reacher_3dof.xml ├── reacher_4dof.xml ├── reacher_5dof.xml └── reacher_6dof.xml ├── reacher_3dof.py ├── reacher_4dof.py ├── reacher_5dof.py └── reacher_6dof.py /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/README.md -------------------------------------------------------------------------------- /agents/bdq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/README.md -------------------------------------------------------------------------------- /agents/bdq/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agents/bdq/common/__init__.py: -------------------------------------------------------------------------------- 1 | from bdq.common.misc_util import * 2 | -------------------------------------------------------------------------------- /agents/bdq/common/atari_wrappers_deprecated.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/common/atari_wrappers_deprecated.py -------------------------------------------------------------------------------- /agents/bdq/common/azure_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/common/azure_utils.py -------------------------------------------------------------------------------- /agents/bdq/common/misc_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/common/misc_util.py -------------------------------------------------------------------------------- /agents/bdq/common/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/common/schedules.py -------------------------------------------------------------------------------- /agents/bdq/common/segment_tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/common/segment_tree.py -------------------------------------------------------------------------------- /agents/bdq/common/tf_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/common/tf_util.py -------------------------------------------------------------------------------- /agents/bdq/deepq/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/deepq/__init__.py -------------------------------------------------------------------------------- /agents/bdq/deepq/build_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/deepq/build_graph.py -------------------------------------------------------------------------------- /agents/bdq/deepq/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/deepq/models.py -------------------------------------------------------------------------------- /agents/bdq/deepq/procedure_continuous_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/deepq/procedure_continuous_tasks.py -------------------------------------------------------------------------------- /agents/bdq/deepq/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/deepq/replay_buffer.py -------------------------------------------------------------------------------- /agents/bdq/enjoy_continuous.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/enjoy_continuous.py -------------------------------------------------------------------------------- /agents/bdq/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/logger.py -------------------------------------------------------------------------------- /agents/bdq/train_continuous.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/train_continuous.py -------------------------------------------------------------------------------- /agents/bdq/trained_models/Hopper-v1/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_11-02-56_Hopper-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/trained_models/Hopper-v1/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_11-02-56_Hopper-v1.pkl -------------------------------------------------------------------------------- /agents/bdq/trained_models/Humanoid-v1/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-01-29_11-52-42_Humanoid-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/trained_models/Humanoid-v1/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-01-29_11-52-42_Humanoid-v1.pkl -------------------------------------------------------------------------------- /agents/bdq/trained_models/Reacher-v1/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_01-01-07_Reacher-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/trained_models/Reacher-v1/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_01-01-07_Reacher-v1.pkl -------------------------------------------------------------------------------- /agents/bdq/trained_models/Reacher3DOF-v0/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_00-49-32_Reacher3DOF-v0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/trained_models/Reacher3DOF-v0/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_00-49-32_Reacher3DOF-v0.pkl -------------------------------------------------------------------------------- /agents/bdq/trained_models/Reacher4DOF-v0/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_00-50-30_Reacher4DOF-v0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/trained_models/Reacher4DOF-v0/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_00-50-30_Reacher4DOF-v0.pkl -------------------------------------------------------------------------------- /agents/bdq/trained_models/Reacher5DOF-v0/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_00-51-17_Reacher5DOF-v0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/trained_models/Reacher5DOF-v0/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_00-51-17_Reacher5DOF-v0.pkl -------------------------------------------------------------------------------- /agents/bdq/trained_models/Reacher6DOF-v0/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_01-03-10_Reacher6DOF-v0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/trained_models/Reacher6DOF-v0/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_01-03-10_Reacher6DOF-v0.pkl -------------------------------------------------------------------------------- /agents/bdq/trained_models/Walker2d-v1/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_13-08-52_Walker2d-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/agents/bdq/trained_models/Walker2d-v1/Branching_Dueling-reduceLocalMean_TD-target-mean_TD-errors-aggregation-v2_granularity-33_2018-02-01_13-08-52_Walker2d-v1.pkl -------------------------------------------------------------------------------- /data/BDQ_Hopper-v1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/BDQ_Hopper-v1.gif -------------------------------------------------------------------------------- /data/BDQ_Humanoid-v1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/BDQ_Humanoid-v1.gif -------------------------------------------------------------------------------- /data/BDQ_Reacher-v1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/BDQ_Reacher-v1.gif -------------------------------------------------------------------------------- /data/BDQ_Reacher3DOF-v0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/BDQ_Reacher3DOF-v0.gif -------------------------------------------------------------------------------- /data/BDQ_Reacher4DOF-v0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/BDQ_Reacher4DOF-v0.gif -------------------------------------------------------------------------------- /data/BDQ_Reacher5DOF-v0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/BDQ_Reacher5DOF-v0.gif -------------------------------------------------------------------------------- /data/BDQ_Reacher6DOF-v0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/BDQ_Reacher6DOF-v0.gif -------------------------------------------------------------------------------- /data/BDQ_Walker2d-v1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/BDQ_Walker2d-v1.gif -------------------------------------------------------------------------------- /data/action_branching_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/action_branching_architecture.png -------------------------------------------------------------------------------- /data/bdq_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/data/bdq_network.png -------------------------------------------------------------------------------- /envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/envs/__init__.py -------------------------------------------------------------------------------- /envs/mujoco/assets/reacher_3dof.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/envs/mujoco/assets/reacher_3dof.xml -------------------------------------------------------------------------------- /envs/mujoco/assets/reacher_4dof.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/envs/mujoco/assets/reacher_4dof.xml -------------------------------------------------------------------------------- /envs/mujoco/assets/reacher_5dof.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/envs/mujoco/assets/reacher_5dof.xml -------------------------------------------------------------------------------- /envs/mujoco/assets/reacher_6dof.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/envs/mujoco/assets/reacher_6dof.xml -------------------------------------------------------------------------------- /envs/mujoco/reacher_3dof.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/envs/mujoco/reacher_3dof.py -------------------------------------------------------------------------------- /envs/mujoco/reacher_4dof.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/envs/mujoco/reacher_4dof.py -------------------------------------------------------------------------------- /envs/mujoco/reacher_5dof.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/envs/mujoco/reacher_5dof.py -------------------------------------------------------------------------------- /envs/mujoco/reacher_6dof.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atavakol/action-branching-agents/HEAD/envs/mujoco/reacher_6dof.py --------------------------------------------------------------------------------