├── .circleci
└── config.yml
├── .gitignore
├── .nojekyll
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── __init__.py
├── benchmarks
├── README.md
├── a3c
│ ├── README.md
│ ├── ant_a3c_16_workers.png
│ ├── half_cheetah_a3c_16_workers.png
│ ├── hopper_a3c_16_workers.png
│ ├── inverted_pendulum_a3c.png
│ ├── space_invaders_a3c_16_workers.png
│ └── walker2d_a3c_16_workers.png
├── acer
│ ├── README.md
│ ├── breakout_acer_16_workers.png
│ ├── pong_acer_16_workers.png
│ └── space_invaders_acer_16_workers.png
├── bootstrapped_dqn
│ ├── README.md
│ ├── breakout_bootstrapped_dqn.png
│ ├── pong_bootstrapped_dqn.png
│ └── space_invaders_bootstrapped_dqn.png
├── clipped_ppo
│ ├── README.md
│ ├── ant_clipped_ppo.png
│ ├── half_cheetah_clipped_ppo.png
│ ├── hopper_clipped_ppo.png
│ ├── humanoid_clipped_ppo.png
│ ├── inverted_double_pendulum_clipped_ppo.png
│ ├── inverted_pendulum_clipped_ppo.png
│ ├── reacher_clipped_ppo.png
│ ├── swimmer_clipped_ppo.png
│ └── walker2d_clipped_ppo.png
├── ddpg
│ ├── README.md
│ ├── ant_ddpg.png
│ ├── half_cheetah_ddpg.png
│ ├── hopper_ddpg.png
│ ├── humanoid_ddpg.png
│ ├── inverted_double_pendulum_ddpg.png
│ ├── inverted_pendulum_ddpg.png
│ ├── reacher_ddpg.png
│ ├── swimmer_ddpg.png
│ └── walker2d_ddpg.png
├── ddpg_her
│ ├── README.md
│ ├── fetch_ddpg_her_pick_and_place_8_workers.png
│ ├── fetch_ddpg_her_push_8_workers.png
│ ├── fetch_ddpg_her_reach_1_worker.png
│ └── fetch_ddpg_her_slide_8_workers.png
├── dfp
│ ├── README.md
│ ├── doom_basic_dfp_8_workers.png
│ ├── doom_health_dfp_8_workers.png
│ └── doom_health_supreme_dfp_8_workers.png
├── dqn
│ ├── README.md
│ ├── breakout_dqn.png
│ ├── pong_dqn.png
│ └── space_invaders_dqn.png
├── dueling_ddqn
│ ├── README.md
│ ├── breakout_dueling_ddqn.png
│ ├── pong_dueling_ddqn.png
│ └── space_invaders_dueling_ddqn.png
├── dueling_ddqn_with_per
│ ├── README.md
│ ├── breakout_dueling_ddqn_with_per.png
│ ├── pong_dueling_ddqn_with_per.png
│ └── space_invaders_dueling_ddqn_with_per.png
├── qr_dqn
│ ├── README.md
│ ├── breakout_qr_dqn.png
│ └── pong_qr_dqn.png
├── sac
│ ├── README.md
│ ├── half_cheetah_sac.png
│ ├── hopper_sac.png
│ ├── humanoid_sac.png
│ ├── inverted_pendulum_sac.png
│ └── walker2d_sac.png
└── td3
│ ├── README.md
│ ├── ant.png
│ ├── half_cheetah.png
│ ├── hopper.png
│ ├── reacher.png
│ └── walker2d.png
├── dist-coach-config.template
├── docker
├── Dockerfile
├── Dockerfile.base
├── Dockerfile.doom_environment
├── Dockerfile.gym_environment
├── Dockerfile.mujoco_environment
├── Dockerfile.starcraft_environment
├── Makefile
└── README.md
├── docs
├── .nojekyll
├── _images
│ ├── ac.png
│ ├── acer.png
│ ├── act.png
│ ├── algorithms.png
│ ├── attention_discretization.png
│ ├── bollinger_bands.png
│ ├── box_discretization.png
│ ├── box_masking.png
│ ├── bs_dqn.png
│ ├── cil.png
│ ├── compare_by_num_episodes.png
│ ├── compare_by_time.png
│ ├── ddpg.png
│ ├── design.png
│ ├── dfp.png
│ ├── distributed.png
│ ├── distributional_dqn.png
│ ├── dqn.png
│ ├── dueling_dqn.png
│ ├── filters.png
│ ├── full_discrete_action_space_map.png
│ ├── horizontal-scale-out.png
│ ├── improve.png
│ ├── linear_box_to_box_map.png
│ ├── naf.png
│ ├── nec.png
│ ├── network.png
│ ├── observe.png
│ ├── partial_discrete_action_space_map.png
│ ├── pg.png
│ ├── ppo.png
│ ├── qr_dqn.png
│ ├── rainbow.png
│ ├── sac.png
│ ├── separate_signals.png
│ ├── td3.png
│ ├── train.png
│ ├── updating_dynamically.gif
│ └── wolpertinger.png
├── _modules
│ ├── index.html
│ └── rl_coach
│ │ ├── agents
│ │ ├── acer_agent.html
│ │ ├── actor_critic_agent.html
│ │ ├── agent.html
│ │ ├── agent_interface.html
│ │ ├── bc_agent.html
│ │ ├── categorical_dqn_agent.html
│ │ ├── cil_agent.html
│ │ ├── clipped_ppo_agent.html
│ │ ├── ddpg_agent.html
│ │ ├── dfp_agent.html
│ │ ├── dqn_agent.html
│ │ ├── mmc_agent.html
│ │ ├── n_step_q_agent.html
│ │ ├── naf_agent.html
│ │ ├── nec_agent.html
│ │ ├── pal_agent.html
│ │ ├── policy_gradients_agent.html
│ │ ├── ppo_agent.html
│ │ ├── qr_dqn_agent.html
│ │ ├── rainbow_dqn_agent.html
│ │ ├── soft_actor_critic_agent.html
│ │ ├── td3_agent.html
│ │ ├── value_optimization_agent.html
│ │ └── wolpertinger_agent.html
│ │ ├── architectures
│ │ ├── architecture.html
│ │ └── network_wrapper.html
│ │ ├── base_parameters.html
│ │ ├── core_types.html
│ │ ├── data_stores
│ │ ├── nfs_data_store.html
│ │ └── s3_data_store.html
│ │ ├── environments
│ │ ├── carla_environment.html
│ │ ├── control_suite_environment.html
│ │ ├── doom_environment.html
│ │ ├── environment.html
│ │ ├── gym_environment.html
│ │ └── starcraft2_environment.html
│ │ ├── exploration_policies
│ │ ├── additive_noise.html
│ │ ├── boltzmann.html
│ │ ├── bootstrapped.html
│ │ ├── categorical.html
│ │ ├── continuous_entropy.html
│ │ ├── e_greedy.html
│ │ ├── exploration_policy.html
│ │ ├── greedy.html
│ │ ├── ou_process.html
│ │ ├── parameter_noise.html
│ │ ├── truncated_normal.html
│ │ └── ucb.html
│ │ ├── filters
│ │ ├── action
│ │ │ ├── attention_discretization.html
│ │ │ ├── box_discretization.html
│ │ │ ├── box_masking.html
│ │ │ ├── full_discrete_action_space_map.html
│ │ │ ├── linear_box_to_box_map.html
│ │ │ └── partial_discrete_action_space_map.html
│ │ ├── observation
│ │ │ ├── observation_clipping_filter.html
│ │ │ ├── observation_crop_filter.html
│ │ │ ├── observation_move_axis_filter.html
│ │ │ ├── observation_normalization_filter.html
│ │ │ ├── observation_reduction_by_sub_parts_name_filter.html
│ │ │ ├── observation_rescale_size_by_factor_filter.html
│ │ │ ├── observation_rescale_to_size_filter.html
│ │ │ ├── observation_rgb_to_y_filter.html
│ │ │ ├── observation_squeeze_filter.html
│ │ │ ├── observation_stacking_filter.html
│ │ │ └── observation_to_uint8_filter.html
│ │ └── reward
│ │ │ ├── reward_clipping_filter.html
│ │ │ ├── reward_normalization_filter.html
│ │ │ └── reward_rescale_filter.html
│ │ ├── memories
│ │ ├── backend
│ │ │ └── redis.html
│ │ ├── episodic
│ │ │ ├── episodic_experience_replay.html
│ │ │ ├── episodic_hindsight_experience_replay.html
│ │ │ ├── episodic_hrl_hindsight_experience_replay.html
│ │ │ └── single_episode_buffer.html
│ │ └── non_episodic
│ │ │ ├── balanced_experience_replay.html
│ │ │ ├── differentiable_neural_dictionary.html
│ │ │ ├── experience_replay.html
│ │ │ ├── prioritized_experience_replay.html
│ │ │ └── transition_collection.html
│ │ ├── orchestrators
│ │ └── kubernetes_orchestrator.html
│ │ └── spaces.html
├── _sources
│ ├── components
│ │ ├── additional_parameters.rst.txt
│ │ ├── agents
│ │ │ ├── imitation
│ │ │ │ ├── bc.rst.txt
│ │ │ │ └── cil.rst.txt
│ │ │ ├── index.rst.txt
│ │ │ ├── other
│ │ │ │ └── dfp.rst.txt
│ │ │ ├── policy_optimization
│ │ │ │ ├── ac.rst.txt
│ │ │ │ ├── acer.rst.txt
│ │ │ │ ├── cppo.rst.txt
│ │ │ │ ├── ddpg.rst.txt
│ │ │ │ ├── hac.rst.txt
│ │ │ │ ├── pg.rst.txt
│ │ │ │ ├── ppo.rst.txt
│ │ │ │ ├── sac.rst.txt
│ │ │ │ ├── td3.rst.txt
│ │ │ │ └── wolpertinger.rst.txt
│ │ │ └── value_optimization
│ │ │ │ ├── bs_dqn.rst.txt
│ │ │ │ ├── categorical_dqn.rst.txt
│ │ │ │ ├── double_dqn.rst.txt
│ │ │ │ ├── dqn.rst.txt
│ │ │ │ ├── dueling_dqn.rst.txt
│ │ │ │ ├── mmc.rst.txt
│ │ │ │ ├── n_step.rst.txt
│ │ │ │ ├── naf.rst.txt
│ │ │ │ ├── nec.rst.txt
│ │ │ │ ├── pal.rst.txt
│ │ │ │ ├── qr_dqn.rst.txt
│ │ │ │ └── rainbow.rst.txt
│ │ ├── architectures
│ │ │ └── index.rst.txt
│ │ ├── core_types.rst.txt
│ │ ├── data_stores
│ │ │ └── index.rst.txt
│ │ ├── environments
│ │ │ └── index.rst.txt
│ │ ├── exploration_policies
│ │ │ └── index.rst.txt
│ │ ├── filters
│ │ │ ├── index.rst.txt
│ │ │ ├── input_filters.rst.txt
│ │ │ └── output_filters.rst.txt
│ │ ├── memories
│ │ │ └── index.rst.txt
│ │ ├── memory_backends
│ │ │ └── index.rst.txt
│ │ ├── orchestrators
│ │ │ └── index.rst.txt
│ │ └── spaces.rst.txt
│ ├── contributing
│ │ ├── add_agent.rst.txt
│ │ └── add_env.rst.txt
│ ├── dashboard.rst.txt
│ ├── design
│ │ ├── control_flow.rst.txt
│ │ ├── horizontal_scaling.rst.txt
│ │ └── network.rst.txt
│ ├── dist_usage.rst.txt
│ ├── features
│ │ ├── algorithms.rst.txt
│ │ ├── batch_rl.rst.txt
│ │ ├── benchmarks.rst.txt
│ │ ├── environments.rst.txt
│ │ └── index.rst.txt
│ ├── index.rst.txt
│ ├── selecting_an_algorithm.rst.txt
│ ├── test.rst.txt
│ └── usage.rst.txt
├── _static
│ ├── basic.css
│ ├── css
│ │ ├── badge_only.css
│ │ ├── custom.css
│ │ └── theme.css
│ ├── dark_logo.png
│ ├── doctools.js
│ ├── documentation_options.js
│ ├── file.png
│ ├── fonts
│ │ ├── Inconsolata-Bold.ttf
│ │ ├── Inconsolata-Regular.ttf
│ │ ├── Inconsolata.ttf
│ │ ├── Lato-Bold.ttf
│ │ ├── Lato-Regular.ttf
│ │ ├── Lato
│ │ │ ├── lato-bold.eot
│ │ │ ├── lato-bold.ttf
│ │ │ ├── lato-bold.woff
│ │ │ ├── lato-bold.woff2
│ │ │ ├── lato-bolditalic.eot
│ │ │ ├── lato-bolditalic.ttf
│ │ │ ├── lato-bolditalic.woff
│ │ │ ├── lato-bolditalic.woff2
│ │ │ ├── lato-italic.eot
│ │ │ ├── lato-italic.ttf
│ │ │ ├── lato-italic.woff
│ │ │ ├── lato-italic.woff2
│ │ │ ├── lato-regular.eot
│ │ │ ├── lato-regular.ttf
│ │ │ ├── lato-regular.woff
│ │ │ └── lato-regular.woff2
│ │ ├── RobotoSlab-Bold.ttf
│ │ ├── RobotoSlab-Regular.ttf
│ │ ├── RobotoSlab
│ │ │ ├── roboto-slab-v7-bold.eot
│ │ │ ├── roboto-slab-v7-bold.ttf
│ │ │ ├── roboto-slab-v7-bold.woff
│ │ │ ├── roboto-slab-v7-bold.woff2
│ │ │ ├── roboto-slab-v7-regular.eot
│ │ │ ├── roboto-slab-v7-regular.ttf
│ │ │ ├── roboto-slab-v7-regular.woff
│ │ │ └── roboto-slab-v7-regular.woff2
│ │ ├── fontawesome-webfont.eot
│ │ ├── fontawesome-webfont.svg
│ │ ├── fontawesome-webfont.ttf
│ │ ├── fontawesome-webfont.woff
│ │ └── fontawesome-webfont.woff2
│ ├── jquery-3.2.1.js
│ ├── jquery.js
│ ├── js
│ │ ├── modernizr.min.js
│ │ └── theme.js
│ ├── language_data.js
│ ├── minus.png
│ ├── plus.png
│ ├── pygments.css
│ ├── searchtools.js
│ ├── underscore-1.3.1.js
│ └── underscore.js
├── components
│ ├── additional_parameters.html
│ ├── agents
│ │ ├── imitation
│ │ │ ├── bc.html
│ │ │ └── cil.html
│ │ ├── index.html
│ │ ├── other
│ │ │ └── dfp.html
│ │ ├── policy_optimization
│ │ │ ├── ac.html
│ │ │ ├── acer.html
│ │ │ ├── cppo.html
│ │ │ ├── ddpg.html
│ │ │ ├── hac.html
│ │ │ ├── pg.html
│ │ │ ├── ppo.html
│ │ │ ├── sac.html
│ │ │ ├── td3.html
│ │ │ └── wolpertinger.html
│ │ └── value_optimization
│ │ │ ├── bs_dqn.html
│ │ │ ├── categorical_dqn.html
│ │ │ ├── double_dqn.html
│ │ │ ├── dqn.html
│ │ │ ├── dueling_dqn.html
│ │ │ ├── mmc.html
│ │ │ ├── n_step.html
│ │ │ ├── naf.html
│ │ │ ├── nec.html
│ │ │ ├── pal.html
│ │ │ ├── qr_dqn.html
│ │ │ └── rainbow.html
│ ├── architectures
│ │ └── index.html
│ ├── core_types.html
│ ├── data_stores
│ │ └── index.html
│ ├── environments
│ │ └── index.html
│ ├── exploration_policies
│ │ └── index.html
│ ├── filters
│ │ ├── index.html
│ │ ├── input_filters.html
│ │ └── output_filters.html
│ ├── memories
│ │ └── index.html
│ ├── memory_backends
│ │ └── index.html
│ ├── orchestrators
│ │ └── index.html
│ └── spaces.html
├── contributing
│ ├── add_agent.html
│ └── add_env.html
├── dashboard.html
├── design
│ ├── control_flow.html
│ ├── horizontal_scaling.html
│ └── network.html
├── dist_usage.html
├── features
│ ├── algorithms.html
│ ├── batch_rl.html
│ ├── benchmarks.html
│ ├── environments.html
│ └── index.html
├── genindex.html
├── index.html
├── objects.inv
├── search.html
├── searchindex.js
├── selecting_an_algorithm.html
├── test.html
└── usage.html
├── docs_raw
├── Makefile
├── README.md
├── __init__.py
├── build_docs.sh
├── make.bat
└── source
│ ├── __init__.py
│ ├── _static
│ ├── css
│ │ └── custom.css
│ └── img
│ │ ├── act.png
│ │ ├── algorithms.png
│ │ ├── attention_discretization.png
│ │ ├── bollinger_bands.png
│ │ ├── box_discretization.png
│ │ ├── box_masking.png
│ │ ├── compare_by_num_episodes.png
│ │ ├── compare_by_time.png
│ │ ├── dark_logo.png
│ │ ├── design.png
│ │ ├── design_imgs
│ │ ├── ac.png
│ │ ├── acer.png
│ │ ├── bs_dqn.png
│ │ ├── cil.png
│ │ ├── ddpg.png
│ │ ├── dfp.png
│ │ ├── distributional_dqn.png
│ │ ├── dqn.png
│ │ ├── dueling_dqn.png
│ │ ├── naf.png
│ │ ├── nec.png
│ │ ├── pg.png
│ │ ├── ppo.png
│ │ ├── qr_dqn.png
│ │ ├── rainbow.png
│ │ ├── sac.png
│ │ ├── td3.png
│ │ └── wolpertinger.png
│ │ ├── diagrams.xml
│ │ ├── distributed.png
│ │ ├── filters.png
│ │ ├── full_discrete_action_space_map.png
│ │ ├── graph.png
│ │ ├── horizontal-scale-out.png
│ │ ├── improve.png
│ │ ├── level.png
│ │ ├── linear_box_to_box_map.png
│ │ ├── network.png
│ │ ├── observe.png
│ │ ├── output_filters.xml
│ │ ├── partial_discrete_action_space_map.png
│ │ ├── separate_signals.png
│ │ ├── train.png
│ │ └── updating_dynamically.gif
│ ├── _templates
│ └── layout.html
│ ├── algorithms.xml
│ ├── components
│ ├── additional_parameters.rst
│ ├── agents
│ │ ├── imitation
│ │ │ ├── bc.rst
│ │ │ └── cil.rst
│ │ ├── index.rst
│ │ ├── other
│ │ │ └── dfp.rst
│ │ ├── policy_optimization
│ │ │ ├── ac.rst
│ │ │ ├── acer.rst
│ │ │ ├── cppo.rst
│ │ │ ├── ddpg.rst
│ │ │ ├── hac.rst
│ │ │ ├── pg.rst
│ │ │ ├── ppo.rst
│ │ │ ├── sac.rst
│ │ │ ├── td3.rst
│ │ │ └── wolpertinger.rst
│ │ └── value_optimization
│ │ │ ├── bs_dqn.rst
│ │ │ ├── categorical_dqn.rst
│ │ │ ├── double_dqn.rst
│ │ │ ├── dqn.rst
│ │ │ ├── dueling_dqn.rst
│ │ │ ├── mmc.rst
│ │ │ ├── n_step.rst
│ │ │ ├── naf.rst
│ │ │ ├── nec.rst
│ │ │ ├── pal.rst
│ │ │ ├── qr_dqn.rst
│ │ │ └── rainbow.rst
│ ├── architectures
│ │ └── index.rst
│ ├── core_types.rst
│ ├── data_stores
│ │ └── index.rst
│ ├── environments
│ │ └── index.rst
│ ├── exploration_policies
│ │ └── index.rst
│ ├── filters
│ │ ├── index.rst
│ │ ├── input_filters.rst
│ │ └── output_filters.rst
│ ├── memories
│ │ └── index.rst
│ ├── memory_backends
│ │ └── index.rst
│ ├── orchestrators
│ │ └── index.rst
│ └── spaces.rst
│ ├── conf.py
│ ├── contributing
│ ├── add_agent.rst
│ └── add_env.rst
│ ├── dashboard.rst
│ ├── design
│ ├── control_flow.rst
│ ├── horizontal_scaling.rst
│ └── network.rst
│ ├── diagrams.xml
│ ├── dist_usage.rst
│ ├── features
│ ├── algorithms.rst
│ ├── batch_rl.rst
│ ├── benchmarks.rst
│ ├── environments.rst
│ └── index.rst
│ ├── index.rst
│ ├── selecting_an_algorithm.rst
│ ├── test.rst
│ └── usage.rst
├── img
├── ant.gif
├── carla.gif
├── coach_logo.png
├── dashboard.gif
├── dashboard.png
├── doom_deathmatch.gif
├── doom_health.gif
├── fetch_slide.gif
├── minitaur.gif
├── montezuma.gif
├── pendulum.gif
└── starcraft.gif
├── requirements.txt
├── rl_coach
├── __init__.py
├── agents
│ ├── __init__.py
│ ├── acer_agent.py
│ ├── actor_critic_agent.py
│ ├── agent.py
│ ├── agent_interface.py
│ ├── bc_agent.py
│ ├── bootstrapped_dqn_agent.py
│ ├── categorical_dqn_agent.py
│ ├── cil_agent.py
│ ├── clipped_ppo_agent.py
│ ├── composite_agent.py
│ ├── ddpg_agent.py
│ ├── ddqn_agent.py
│ ├── ddqn_bcq_agent.py
│ ├── dfp_agent.py
│ ├── dqn_agent.py
│ ├── hac_ddpg_agent.py
│ ├── human_agent.py
│ ├── imitation_agent.py
│ ├── mmc_agent.py
│ ├── n_step_q_agent.py
│ ├── naf_agent.py
│ ├── nec_agent.py
│ ├── pal_agent.py
│ ├── policy_gradients_agent.py
│ ├── policy_optimization_agent.py
│ ├── ppo_agent.py
│ ├── qr_dqn_agent.py
│ ├── rainbow_dqn_agent.py
│ ├── soft_actor_critic_agent.py
│ ├── td3_agent.py
│ ├── td3_exp_agent.py
│ ├── value_optimization_agent.py
│ └── wolpertinger_agent.py
├── architectures
│ ├── __init__.py
│ ├── architecture.py
│ ├── embedder_parameters.py
│ ├── head_parameters.py
│ ├── layers.py
│ ├── middleware_parameters.py
│ ├── mxnet_components
│ │ ├── __init__.py
│ │ ├── architecture.py
│ │ ├── embedders
│ │ │ ├── __init__.py
│ │ │ ├── embedder.py
│ │ │ ├── image_embedder.py
│ │ │ ├── tensor_embedder.py
│ │ │ └── vector_embedder.py
│ │ ├── general_network.py
│ │ ├── heads
│ │ │ ├── __init__.py
│ │ │ ├── head.py
│ │ │ ├── ppo_head.py
│ │ │ ├── ppo_v_head.py
│ │ │ ├── q_head.py
│ │ │ └── v_head.py
│ │ ├── layers.py
│ │ ├── middlewares
│ │ │ ├── __init__.py
│ │ │ ├── fc_middleware.py
│ │ │ ├── lstm_middleware.py
│ │ │ └── middleware.py
│ │ ├── savers.py
│ │ └── utils.py
│ ├── network_wrapper.py
│ └── tensorflow_components
│ │ ├── __init__.py
│ │ ├── architecture.py
│ │ ├── distributed_tf_utils.py
│ │ ├── embedders
│ │ ├── __init__.py
│ │ ├── embedder.py
│ │ ├── image_embedder.py
│ │ ├── tensor_embedder.py
│ │ └── vector_embedder.py
│ │ ├── general_network.py
│ │ ├── heads
│ │ ├── RND_head.py
│ │ ├── __init__.py
│ │ ├── acer_policy_head.py
│ │ ├── categorical_q_head.py
│ │ ├── cil_head.py
│ │ ├── classification_head.py
│ │ ├── ddpg_actor_head.py
│ │ ├── ddpg_v_head.py
│ │ ├── dnd_q_head.py
│ │ ├── dueling_q_head.py
│ │ ├── head.py
│ │ ├── measurements_prediction_head.py
│ │ ├── naf_head.py
│ │ ├── policy_head.py
│ │ ├── ppo_head.py
│ │ ├── ppo_v_head.py
│ │ ├── q_head.py
│ │ ├── quantile_regression_q_head.py
│ │ ├── rainbow_q_head.py
│ │ ├── sac_head.py
│ │ ├── sac_q_head.py
│ │ ├── td3_v_head.py
│ │ ├── v_head.py
│ │ └── wolpertinger_actor_head.py
│ │ ├── layers.py
│ │ ├── middlewares
│ │ ├── __init__.py
│ │ ├── fc_middleware.py
│ │ ├── lstm_middleware.py
│ │ └── middleware.py
│ │ ├── savers.py
│ │ ├── shared_variables.py
│ │ └── utils.py
├── base_parameters.py
├── checkpoint.py
├── coach.py
├── core_types.py
├── dashboard.py
├── dashboard_components
│ ├── __init__.py
│ ├── boards.py
│ ├── episodic_board.py
│ ├── experiment_board.py
│ ├── globals.py
│ ├── landing_page.py
│ ├── signals.py
│ ├── signals_file.py
│ ├── signals_file_base.py
│ ├── signals_files_group.py
│ └── spinner.css
├── data_stores
│ ├── __init__.py
│ ├── checkpoint_data_store.py
│ ├── data_store.py
│ ├── data_store_impl.py
│ ├── nfs_data_store.py
│ ├── redis_data_store.py
│ └── s3_data_store.py
├── debug_utils.py
├── environments
│ ├── CarlaSettings.ini
│ ├── README.md
│ ├── __init__.py
│ ├── carla_environment.py
│ ├── control_suite_environment.py
│ ├── doom
│ │ ├── D2_navigation.cfg
│ │ ├── D2_navigation.wad
│ │ ├── D3_battle.cfg
│ │ └── D3_battle.wad
│ ├── doom_environment.py
│ ├── environment.py
│ ├── environment_interface.py
│ ├── gym_environment.py
│ ├── mujoco
│ │ ├── __init__.py
│ │ ├── common
│ │ │ ├── __init__.py
│ │ │ ├── materials.xml
│ │ │ ├── skybox.xml
│ │ │ └── visual.xml
│ │ ├── pendulum_with_goals.py
│ │ └── pendulum_with_goals.xml
│ ├── robosuite
│ │ ├── cube_exp.py
│ │ └── osc_pose.json
│ ├── robosuite_environment.py
│ ├── starcraft2_environment.py
│ └── toy_problems
│ │ ├── __init__.py
│ │ ├── bit_flip.py
│ │ └── exploration_chain.py
├── exploration_policies
│ ├── README.md
│ ├── __init__.py
│ ├── additive_noise.py
│ ├── boltzmann.py
│ ├── bootstrapped.py
│ ├── categorical.py
│ ├── continuous_entropy.py
│ ├── e_greedy.py
│ ├── exploration_policy.py
│ ├── greedy.py
│ ├── ou_process.py
│ ├── parameter_noise.py
│ ├── truncated_normal.py
│ └── ucb.py
├── filters
│ ├── README.md
│ ├── __init__.py
│ ├── action
│ │ ├── __init__.py
│ │ ├── action_filter.py
│ │ ├── attention_discretization.py
│ │ ├── box_discretization.py
│ │ ├── box_masking.py
│ │ ├── full_discrete_action_space_map.py
│ │ ├── linear_box_to_box_map.py
│ │ └── partial_discrete_action_space_map.py
│ ├── filter.py
│ ├── observation
│ │ ├── __init__.py
│ │ ├── observation_clipping_filter.py
│ │ ├── observation_crop_filter.py
│ │ ├── observation_filter.py
│ │ ├── observation_move_axis_filter.py
│ │ ├── observation_normalization_filter.py
│ │ ├── observation_reduction_by_sub_parts_name_filter.py
│ │ ├── observation_rescale_size_by_factor_filter.py
│ │ ├── observation_rescale_to_size_filter.py
│ │ ├── observation_rgb_to_y_filter.py
│ │ ├── observation_squeeze_filter.py
│ │ ├── observation_stacking_filter.py
│ │ └── observation_to_uint8_filter.py
│ └── reward
│ │ ├── __init__.py
│ │ ├── reward_clipping_filter.py
│ │ ├── reward_ewma_normalization_filter.py
│ │ ├── reward_filter.py
│ │ ├── reward_normalization_filter.py
│ │ └── reward_rescale_filter.py
├── graph_managers
│ ├── README.md
│ ├── __init__.py
│ ├── basic_rl_graph_manager.py
│ ├── batch_rl_graph_manager.py
│ ├── graph_manager.py
│ ├── hac_graph_manager.py
│ └── hrl_graph_manager.py
├── level_manager.py
├── logger.py
├── memories
│ ├── __init__.py
│ ├── backend
│ │ ├── __init__.py
│ │ ├── memory.py
│ │ ├── memory_impl.py
│ │ └── redis.py
│ ├── episodic
│ │ ├── __init__.py
│ │ ├── episodic_experience_replay.py
│ │ ├── episodic_hindsight_experience_replay.py
│ │ ├── episodic_hrl_hindsight_experience_replay.py
│ │ └── single_episode_buffer.py
│ ├── memory.py
│ └── non_episodic
│ │ ├── __init__.py
│ │ ├── balanced_experience_replay.py
│ │ ├── differentiable_neural_dictionary.py
│ │ ├── experience_replay.py
│ │ ├── prioritized_experience_replay.py
│ │ └── transition_collection.py
├── off_policy_evaluators
│ ├── __init__.py
│ ├── bandits
│ │ ├── __init__.py
│ │ └── doubly_robust.py
│ ├── ope_manager.py
│ └── rl
│ │ ├── __init__.py
│ │ ├── sequential_doubly_robust.py
│ │ └── weighted_importance_sampling.py
├── orchestrators
│ ├── __init__.py
│ ├── deploy.py
│ └── kubernetes_orchestrator.py
├── plot_atari.py
├── presets
│ ├── Acrobot_DDQN_BCQ_BatchRL.py
│ ├── Atari_A3C.py
│ ├── Atari_A3C_LSTM.py
│ ├── Atari_ACER.py
│ ├── Atari_Bootstrapped_DQN.py
│ ├── Atari_C51.py
│ ├── Atari_DDQN.py
│ ├── Atari_DDQN_with_PER.py
│ ├── Atari_DQN.py
│ ├── Atari_DQN_with_PER.py
│ ├── Atari_Dueling_DDQN.py
│ ├── Atari_Dueling_DDQN_with_PER_OpenAI.py
│ ├── Atari_NEC.py
│ ├── Atari_NStepQ.py
│ ├── Atari_QR_DQN.py
│ ├── Atari_Rainbow.py
│ ├── Atari_UCB_with_Q_Ensembles.py
│ ├── BitFlip_DQN.py
│ ├── BitFlip_DQN_HER.py
│ ├── CARLA_3_Cameras_DDPG.py
│ ├── CARLA_CIL.py
│ ├── CARLA_DDPG.py
│ ├── CARLA_Dueling_DDQN.py
│ ├── CartPole_A3C.py
│ ├── CartPole_ACER.py
│ ├── CartPole_ClippedPPO.py
│ ├── CartPole_DDQN_BCQ_BatchRL.py
│ ├── CartPole_DDQN_BatchRL.py
│ ├── CartPole_DFP.py
│ ├── CartPole_DQN.py
│ ├── CartPole_Dueling_DDQN.py
│ ├── CartPole_NEC.py
│ ├── CartPole_NStepQ.py
│ ├── CartPole_PAL.py
│ ├── CartPole_PG.py
│ ├── CartPole_QR_DQN.py
│ ├── CartPole_Rainbow.py
│ ├── ControlSuite_DDPG.py
│ ├── Doom_Basic_A3C.py
│ ├── Doom_Basic_ACER.py
│ ├── Doom_Basic_BC.py
│ ├── Doom_Basic_DFP.py
│ ├── Doom_Basic_DQN.py
│ ├── Doom_Basic_Dueling_DDQN.py
│ ├── Doom_Battle_DFP.py
│ ├── Doom_Health_DFP.py
│ ├── Doom_Health_MMC.py
│ ├── Doom_Health_Supreme_DFP.py
│ ├── ExplorationChain_Bootstrapped_DQN.py
│ ├── ExplorationChain_Dueling_DDQN.py
│ ├── ExplorationChain_UCB_Q_ensembles.py
│ ├── Fetch_DDPG_HER_baselines.py
│ ├── InvertedPendulum_PG.py
│ ├── MontezumaRevenge_BC.py
│ ├── Mujoco_A3C.py
│ ├── Mujoco_A3C_LSTM.py
│ ├── Mujoco_ClippedPPO.py
│ ├── Mujoco_DDPG.py
│ ├── Mujoco_NAF.py
│ ├── Mujoco_PPO.py
│ ├── Mujoco_SAC.py
│ ├── Mujoco_TD3.py
│ ├── Mujoco_Wolpertinger.py
│ ├── Pendulum_HAC.py
│ ├── README.md
│ ├── RoboSuite_CubeExp_Random.py
│ ├── RoboSuite_CubeExp_TD3_Goal_Based.py
│ ├── RoboSuite_CubeExp_TD3_Intrinsic_Reward.py
│ ├── Starcraft_CollectMinerals_A3C.py
│ ├── Starcraft_CollectMinerals_Dueling_DDQN.py
│ └── __init__.py
├── renderer.py
├── rollout_worker.py
├── run_multiple_seeds.py
├── saver.py
├── schedules.py
├── spaces.py
├── tests
│ ├── README.md
│ ├── __init__.py
│ ├── agents
│ │ ├── __init__.py
│ │ └── test_agent_external_communication.py
│ ├── architectures
│ │ ├── __init__.py
│ │ ├── mxnet_components
│ │ │ ├── __init__.py
│ │ │ ├── embedders
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_image_embedder.py
│ │ │ │ └── test_vector_embedder.py
│ │ │ ├── heads
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_head.py
│ │ │ │ ├── test_ppo_head.py
│ │ │ │ ├── test_ppo_v_head.py
│ │ │ │ ├── test_q_head.py
│ │ │ │ └── test_v_head.py
│ │ │ ├── middlewares
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_fc_middleware.py
│ │ │ │ └── test_lstm_middleware.py
│ │ │ └── test_utils.py
│ │ └── tensorflow_components
│ │ │ ├── __init__.py
│ │ │ └── embedders
│ │ │ ├── __init__.py
│ │ │ ├── test_identity_embedder.py
│ │ │ ├── test_image_embedder.py
│ │ │ └── test_vector_embedder.py
│ ├── conftest.py
│ ├── environments
│ │ ├── __init__.py
│ │ └── test_gym_environment.py
│ ├── exploration_policies
│ │ ├── __init__.py
│ │ ├── test_additive_noise.py
│ │ ├── test_e_greedy.py
│ │ ├── test_greedy.py
│ │ └── test_ou_process.py
│ ├── filters
│ │ ├── __init__.py
│ │ ├── action
│ │ │ ├── __init__.py
│ │ │ ├── test_attention_discretization.py
│ │ │ ├── test_box_discretization.py
│ │ │ ├── test_box_masking.py
│ │ │ └── test_linear_box_to_box_map.py
│ │ ├── observation
│ │ │ ├── __init__.py
│ │ │ ├── test_observation_crop_filter.py
│ │ │ ├── test_observation_reduction_by_sub_parts_name_filter.py
│ │ │ ├── test_observation_rescale_size_by_factor_filter.py
│ │ │ ├── test_observation_rescale_to_size_filter.py
│ │ │ ├── test_observation_rgb_to_y_filter.py
│ │ │ ├── test_observation_squeeze_filter.py
│ │ │ ├── test_observation_stacking_filter.py
│ │ │ └── test_observation_to_uint8_filter.py
│ │ ├── reward
│ │ │ ├── __init__.py
│ │ │ ├── test_reward_clipping_filter.py
│ │ │ └── test_reward_rescale_filter.py
│ │ └── test_filters_stacking.py
│ ├── graph_managers
│ │ ├── __init__.py
│ │ ├── test_basic_rl_graph_manager.py
│ │ └── test_graph_manager.py
│ ├── memories
│ │ ├── __init__.py
│ │ ├── test_differential_neural_dictionary.py
│ │ ├── test_hindsight_experience_replay.py
│ │ ├── test_prioritized_experience_replay.py
│ │ └── test_single_episode_buffer.py
│ ├── presets
│ │ ├── __init__.py
│ │ └── test_presets.py
│ ├── pytest.ini
│ ├── test_checkpoint.py
│ ├── test_coach_args.py
│ ├── test_core_types.py
│ ├── test_dist_coach.py
│ ├── test_eks.py
│ ├── test_global_variable_saver.py
│ ├── test_golden.py
│ ├── test_saver.py
│ ├── test_schedules.py
│ ├── test_spaces.py
│ ├── trace_tests.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── args_utils.py
│ │ ├── definitions.py
│ │ ├── presets_utils.py
│ │ └── test_utils.py
├── training_worker.py
├── utilities
│ ├── __init__.py
│ ├── carla_dataset_to_replay_buffer.py
│ └── shared_running_stats.py
└── utils.py
├── setup.py
└── tutorials
├── 0. Quick Start Guide.ipynb
├── 1. Implementing an Algorithm.ipynb
├── 2. Adding an Environment.ipynb
├── 3. Implementing a Hierarchical RL Graph.ipynb
├── 4. Batch Reinforcement Learning.ipynb
├── 5. Goal-Based Data Collection.ipynb
├── Resources
├── acrobot_dataset.csv
├── exploration.py
└── img
│ ├── dr.png
│ ├── model_selection.png
│ └── wis.png
└── python_invocation_example.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | experiments
3 | *.pyc
4 | checkpoints
5 | _vizdoom.ini
6 | *.*~
7 | MUJOCO_LOG.TXT
8 | test_log.txt
9 | .test
10 | tf_logs
11 | bullet3
12 | roboschool
13 | *.csv
14 | *.doc
15 | *.orig
16 | docs/site
17 | coach_env
18 | venv
19 | build
20 | rl_coach.egg*
21 | rl_coach_slim.egg*
22 | contrib
23 | test_log_*
24 | dist
25 | .DS_Store
26 | datasets
27 | .cache
28 | .pytest_cache
29 | core
30 | trace_test*
31 | *.swp
32 | *.swo
33 | .cache/
34 | *.pyc
35 | coachenv
36 |
--------------------------------------------------------------------------------
/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/.nojekyll
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include rl_coach/dashboard_components/*.css
2 | include rl_coach/environments/doom/*.cfg
3 | include rl_coach/environments/doom/*.wad
4 | include rl_coach/environments/mujoco/common/*.xml
5 | include rl_coach/environments/mujoco/*.xml
6 | include rl_coach/environments/*.ini
7 | include rl_coach/tests/*.ini
8 | include requirements.txt
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/__init__.py
--------------------------------------------------------------------------------
/benchmarks/a3c/ant_a3c_16_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/a3c/ant_a3c_16_workers.png
--------------------------------------------------------------------------------
/benchmarks/a3c/half_cheetah_a3c_16_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/a3c/half_cheetah_a3c_16_workers.png
--------------------------------------------------------------------------------
/benchmarks/a3c/hopper_a3c_16_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/a3c/hopper_a3c_16_workers.png
--------------------------------------------------------------------------------
/benchmarks/a3c/inverted_pendulum_a3c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/a3c/inverted_pendulum_a3c.png
--------------------------------------------------------------------------------
/benchmarks/a3c/space_invaders_a3c_16_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/a3c/space_invaders_a3c_16_workers.png
--------------------------------------------------------------------------------
/benchmarks/a3c/walker2d_a3c_16_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/a3c/walker2d_a3c_16_workers.png
--------------------------------------------------------------------------------
/benchmarks/acer/README.md:
--------------------------------------------------------------------------------
1 | # ACER
2 |
3 | Each experiment uses 3 seeds.
4 | The parameters used for ACER are the same parameters as described in the [original paper](https://arxiv.org/abs/1611.01224), except for the optimizer (changed to ADAM) and learning rate (1e-4) used.
5 |
6 | ### Breakout ACER - 16 workers
7 |
8 | ```bash
9 | coach -p Atari_ACER -lvl breakout -n 16
10 | ```
11 |
12 |
13 |
14 | ### Space Invaders ACER - 16 workers
15 |
16 | ```bash
17 | coach -p Atari_ACER -lvl space_invaders -n 16
18 | ```
19 |
20 |
21 |
22 | ### Pong ACER - 16 workers
23 |
24 | ```bash
25 | coach -p Atari_ACER -lvl pong -n 16
26 | ```
27 |
28 |
29 |
--------------------------------------------------------------------------------
/benchmarks/acer/breakout_acer_16_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/acer/breakout_acer_16_workers.png
--------------------------------------------------------------------------------
/benchmarks/acer/pong_acer_16_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/acer/pong_acer_16_workers.png
--------------------------------------------------------------------------------
/benchmarks/acer/space_invaders_acer_16_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/acer/space_invaders_acer_16_workers.png
--------------------------------------------------------------------------------
/benchmarks/bootstrapped_dqn/README.md:
--------------------------------------------------------------------------------
1 | # Bootstrapped DQN
2 |
3 | Each experiment uses 3 seeds.
4 | The parameters used for Bootstrapped DQN are the same parameters as described in the [original paper](https://arxiv.org/abs/1602.04621.pdf).
5 |
6 | ### Breakout Bootstrapped DQN - single worker
7 |
8 | ```bash
9 | coach -p Atari_Bootstrapped_DQN -lvl breakout
10 | ```
11 |
12 |
13 |
14 |
15 | ### Pong Bootstrapped DQN - single worker
16 |
17 | ```bash
18 | coach -p Atari_Bootstrapped_DQN -lvl pong
19 | ```
20 |
21 |
22 |
23 |
24 | ### Space Invaders Bootstrapped DQN - single worker
25 |
26 | ```bash
27 | coach -p Atari_Bootstrapped_DQN -lvl space_invaders
28 | ```
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/benchmarks/bootstrapped_dqn/breakout_bootstrapped_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/bootstrapped_dqn/breakout_bootstrapped_dqn.png
--------------------------------------------------------------------------------
/benchmarks/bootstrapped_dqn/pong_bootstrapped_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/bootstrapped_dqn/pong_bootstrapped_dqn.png
--------------------------------------------------------------------------------
/benchmarks/bootstrapped_dqn/space_invaders_bootstrapped_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/bootstrapped_dqn/space_invaders_bootstrapped_dqn.png
--------------------------------------------------------------------------------
/benchmarks/clipped_ppo/ant_clipped_ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/clipped_ppo/ant_clipped_ppo.png
--------------------------------------------------------------------------------
/benchmarks/clipped_ppo/half_cheetah_clipped_ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/clipped_ppo/half_cheetah_clipped_ppo.png
--------------------------------------------------------------------------------
/benchmarks/clipped_ppo/hopper_clipped_ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/clipped_ppo/hopper_clipped_ppo.png
--------------------------------------------------------------------------------
/benchmarks/clipped_ppo/humanoid_clipped_ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/clipped_ppo/humanoid_clipped_ppo.png
--------------------------------------------------------------------------------
/benchmarks/clipped_ppo/inverted_double_pendulum_clipped_ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/clipped_ppo/inverted_double_pendulum_clipped_ppo.png
--------------------------------------------------------------------------------
/benchmarks/clipped_ppo/inverted_pendulum_clipped_ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/clipped_ppo/inverted_pendulum_clipped_ppo.png
--------------------------------------------------------------------------------
/benchmarks/clipped_ppo/reacher_clipped_ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/clipped_ppo/reacher_clipped_ppo.png
--------------------------------------------------------------------------------
/benchmarks/clipped_ppo/swimmer_clipped_ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/clipped_ppo/swimmer_clipped_ppo.png
--------------------------------------------------------------------------------
/benchmarks/clipped_ppo/walker2d_clipped_ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/clipped_ppo/walker2d_clipped_ppo.png
--------------------------------------------------------------------------------
/benchmarks/ddpg/ant_ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg/ant_ddpg.png
--------------------------------------------------------------------------------
/benchmarks/ddpg/half_cheetah_ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg/half_cheetah_ddpg.png
--------------------------------------------------------------------------------
/benchmarks/ddpg/hopper_ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg/hopper_ddpg.png
--------------------------------------------------------------------------------
/benchmarks/ddpg/humanoid_ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg/humanoid_ddpg.png
--------------------------------------------------------------------------------
/benchmarks/ddpg/inverted_double_pendulum_ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg/inverted_double_pendulum_ddpg.png
--------------------------------------------------------------------------------
/benchmarks/ddpg/inverted_pendulum_ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg/inverted_pendulum_ddpg.png
--------------------------------------------------------------------------------
/benchmarks/ddpg/reacher_ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg/reacher_ddpg.png
--------------------------------------------------------------------------------
/benchmarks/ddpg/swimmer_ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg/swimmer_ddpg.png
--------------------------------------------------------------------------------
/benchmarks/ddpg/walker2d_ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg/walker2d_ddpg.png
--------------------------------------------------------------------------------
/benchmarks/ddpg_her/README.md:
--------------------------------------------------------------------------------
1 | # DDPG with Hindsight Experience Replay
2 |
3 | Each experiment uses 3 seeds.
4 | The parameters used for DDPG HER are the same parameters as described in the [following paper](https://arxiv.org/abs/1802.09464).
5 |
6 | ### Fetch Reach DDPG HER - single worker
7 |
8 | ```bash
9 | coach -p Fetch_DDPG_HER_baselines -lvl reach
10 | ```
11 |
12 |
13 |
14 |
15 | ### Fetch Push DDPG HER - 8 workers
16 |
17 | ```bash
18 | coach -p Fetch_DDPG_HER_baselines -lvl push -n 8
19 | ```
20 |
21 |
22 |
23 |
24 | ### Fetch Slide DDPG HER - 8 workers
25 |
26 | ```bash
27 | coach -p Fetch_DDPG_HER_baselines -lvl slide -n 8
28 | ```
29 |
30 |
31 |
32 |
33 | ### Fetch Pick And Place DDPG HER - 8 workers
34 |
35 | ```bash
36 | coach -p Fetch_DDPG_HER -lvl pick_and_place -n 8
37 | ```
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/benchmarks/ddpg_her/fetch_ddpg_her_pick_and_place_8_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg_her/fetch_ddpg_her_pick_and_place_8_workers.png
--------------------------------------------------------------------------------
/benchmarks/ddpg_her/fetch_ddpg_her_push_8_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg_her/fetch_ddpg_her_push_8_workers.png
--------------------------------------------------------------------------------
/benchmarks/ddpg_her/fetch_ddpg_her_reach_1_worker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg_her/fetch_ddpg_her_reach_1_worker.png
--------------------------------------------------------------------------------
/benchmarks/ddpg_her/fetch_ddpg_her_slide_8_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/ddpg_her/fetch_ddpg_her_slide_8_workers.png
--------------------------------------------------------------------------------
/benchmarks/dfp/README.md:
--------------------------------------------------------------------------------
1 | # DFP
2 |
3 | Each experiment uses 3 seeds.
4 | The parameters used for DFP are the same parameters as described in the [original paper](https://arxiv.org/abs/1611.01779).
5 |
6 | ### Doom Basic DFP - 8 workers
7 |
8 | ```bash
9 | coach -p Doom_Basic_DFP -n 8
10 | ```
11 |
12 |
13 |
14 |
15 | ### Doom Health (D1: Basic) DFP - 8 workers
16 |
17 | ```bash
18 | coach -p Doom_Health_DFP -n 8
19 | ```
20 |
21 |
22 |
23 |
24 |
25 | ### Doom Health Supreme (D2: Navigation) DFP - 8 workers
26 |
27 | ```bash
28 | coach -p Doom_Health_Supreme_DFP -n 8
29 | ```
30 |
31 |
32 |
--------------------------------------------------------------------------------
/benchmarks/dfp/doom_basic_dfp_8_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dfp/doom_basic_dfp_8_workers.png
--------------------------------------------------------------------------------
/benchmarks/dfp/doom_health_dfp_8_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dfp/doom_health_dfp_8_workers.png
--------------------------------------------------------------------------------
/benchmarks/dfp/doom_health_supreme_dfp_8_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dfp/doom_health_supreme_dfp_8_workers.png
--------------------------------------------------------------------------------
/benchmarks/dqn/README.md:
--------------------------------------------------------------------------------
1 | # DQN
2 |
3 | Each experiment uses 3 seeds.
4 | The parameters used for DQN are the same parameters as described in the [original paper](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf), except for the optimizer (changed to ADAM) and learning rate (1e-4) used.
5 |
6 | ### Breakout DQN - single worker
7 |
8 | ```bash
9 | coach -p Atari_DQN -lvl breakout
10 | ```
11 |
12 |
13 |
14 | ### Pong DQN - single worker
15 |
16 | ```bash
17 | coach -p Atari_DQN -lvl pong
18 | ```
19 |
20 |
21 |
22 | ### Space Invaders DQN - single worker
23 |
24 | ```bash
25 | coach -p Atari_DQN -lvl space_invaders
26 | ```
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/benchmarks/dqn/breakout_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dqn/breakout_dqn.png
--------------------------------------------------------------------------------
/benchmarks/dqn/pong_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dqn/pong_dqn.png
--------------------------------------------------------------------------------
/benchmarks/dqn/space_invaders_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dqn/space_invaders_dqn.png
--------------------------------------------------------------------------------
/benchmarks/dueling_ddqn/README.md:
--------------------------------------------------------------------------------
1 | # Dueling DDQN
2 |
3 | Each experiment uses 3 seeds and is trained for 10k environment steps.
4 | The parameters used for Dueling DDQN are the same parameters as described in the [original paper](https://arxiv.org/abs/1706.01502).
5 |
6 | ### Pong Dueling DDQN - single worker
7 |
8 | ```bash
9 | coach -p Atari_Dueling_DDQN -lvl pong
10 | ```
11 |
12 |
13 |
14 |
15 | ### Breakout Dueling DDQN - single worker
16 |
17 | ```bash
18 | coach -p Atari_Dueling_DDQN -lvl breakout
19 | ```
20 |
21 |
22 |
23 |
24 | ### Space Invaders Dueling DDQN - single worker
25 |
26 | ```bash
27 | coach -p Atari_Dueling_DDQN -lvl space_invaders
28 | ```
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/benchmarks/dueling_ddqn/breakout_dueling_ddqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dueling_ddqn/breakout_dueling_ddqn.png
--------------------------------------------------------------------------------
/benchmarks/dueling_ddqn/pong_dueling_ddqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dueling_ddqn/pong_dueling_ddqn.png
--------------------------------------------------------------------------------
/benchmarks/dueling_ddqn/space_invaders_dueling_ddqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dueling_ddqn/space_invaders_dueling_ddqn.png
--------------------------------------------------------------------------------
/benchmarks/dueling_ddqn_with_per/README.md:
--------------------------------------------------------------------------------
1 | # Dueling DDQN with Prioritized Experience Replay
2 |
3 | Each experiment uses 3 seeds and is trained for 10k environment steps.
4 | The parameters used for Dueling DDQN with PER are the same parameters as described in the [following paper](https://arxiv.org/abs/1511.05952).
5 |
6 | ### Breakout Dueling DDQN with PER - single worker
7 |
8 | ```bash
9 | coach -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl breakout
10 | ```
11 |
12 |
13 |
14 |
15 | ### Pong Dueling DDQN with PER - single worker
16 |
17 | ```bash
18 | coach -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl pong
19 | ```
20 |
21 |
22 |
23 |
24 | ### Space Invaders Dueling DDQN with PER - single worker
25 |
26 | ```bash
27 | coach -p Atari_Dueling_DDQN_with_PER_OpenAI -lvl space_invaders
28 | ```
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/benchmarks/dueling_ddqn_with_per/breakout_dueling_ddqn_with_per.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dueling_ddqn_with_per/breakout_dueling_ddqn_with_per.png
--------------------------------------------------------------------------------
/benchmarks/dueling_ddqn_with_per/pong_dueling_ddqn_with_per.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dueling_ddqn_with_per/pong_dueling_ddqn_with_per.png
--------------------------------------------------------------------------------
/benchmarks/dueling_ddqn_with_per/space_invaders_dueling_ddqn_with_per.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/dueling_ddqn_with_per/space_invaders_dueling_ddqn_with_per.png
--------------------------------------------------------------------------------
/benchmarks/qr_dqn/README.md:
--------------------------------------------------------------------------------
1 | # Quantile Regression DQN
2 |
3 | Each experiment uses 3 seeds and is trained for 10k environment steps.
4 | The parameters used for QR-DQN are the same parameters as described in the [original paper](https://arxiv.org/abs/1710.10044.pdf).
5 |
6 | ### Breakout QR-DQN - single worker
7 |
8 | ```bash
9 | coach -p Atari_QR_DQN -lvl breakout
10 | ```
11 |
12 |
13 |
14 |
15 | ### Pong QR-DQN - single worker
16 |
17 | ```bash
18 | coach -p Atari_QR_DQN -lvl pong
19 | ```
20 |
21 |
22 |
--------------------------------------------------------------------------------
/benchmarks/qr_dqn/breakout_qr_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/qr_dqn/breakout_qr_dqn.png
--------------------------------------------------------------------------------
/benchmarks/qr_dqn/pong_qr_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/qr_dqn/pong_qr_dqn.png
--------------------------------------------------------------------------------
/benchmarks/sac/README.md:
--------------------------------------------------------------------------------
1 | # Soft Actor Critic
2 |
3 | Each experiment uses 3 seeds and is trained for 3M environment steps.
4 | The parameters used for SAC are the same parameters as described in the [original paper](https://arxiv.org/abs/1801.01290).
5 |
6 | ### Inverted Pendulum SAC - single worker
7 |
8 | ```bash
9 | coach -p Mujoco_SAC -lvl inverted_pendulum
10 | ```
11 |
12 |
13 |
14 |
15 | ### Hopper Clipped SAC - single worker
16 |
17 | ```bash
18 | coach -p Mujoco_SAC -lvl hopper
19 | ```
20 |
21 |
22 |
23 |
24 | ### Half Cheetah Clipped SAC - single worker
25 |
26 | ```bash
27 | coach -p Mujoco_SAC -lvl half_cheetah
28 | ```
29 |
30 |
31 |
32 |
33 | ### Walker 2D Clipped SAC - single worker
34 |
35 | ```bash
36 | coach -p Mujoco_SAC -lvl walker2d
37 | ```
38 |
39 |
40 |
41 |
42 | ### Humanoid Clipped SAC - single worker
43 |
44 | ```bash
45 | coach -p Mujoco_SAC -lvl humanoid
46 | ```
47 |
48 |
49 |
--------------------------------------------------------------------------------
/benchmarks/sac/half_cheetah_sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/sac/half_cheetah_sac.png
--------------------------------------------------------------------------------
/benchmarks/sac/hopper_sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/sac/hopper_sac.png
--------------------------------------------------------------------------------
/benchmarks/sac/humanoid_sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/sac/humanoid_sac.png
--------------------------------------------------------------------------------
/benchmarks/sac/inverted_pendulum_sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/sac/inverted_pendulum_sac.png
--------------------------------------------------------------------------------
/benchmarks/sac/walker2d_sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/sac/walker2d_sac.png
--------------------------------------------------------------------------------
/benchmarks/td3/README.md:
--------------------------------------------------------------------------------
1 | # Twin Delayed DDPG
2 |
3 | Each experiment uses 5 seeds and is trained for 1M environment steps.
4 | The parameters used for TD3 are the same parameters as described in the [original paper](https://arxiv.org/pdf/1802.09477.pdf), and [repository](https://github.com/sfujim/TD3).
5 |
6 | ### Ant TD3 - single worker
7 |
8 | ```bash
9 | coach -p Mujoco_TD3 -lvl ant
10 | ```
11 |
12 |
13 |
14 |
15 | ### Hopper TD3 - single worker
16 |
17 | ```bash
18 | coach -p Mujoco_TD3 -lvl hopper
19 | ```
20 |
21 |
22 |
23 |
24 | ### Half Cheetah TD3 - single worker
25 |
26 | ```bash
27 | coach -p Mujoco_TD3 -lvl half_cheetah
28 | ```
29 |
30 |
31 |
32 |
33 | ### Reacher TD3 - single worker
34 |
35 | ```bash
36 | coach -p Mujoco_TD3 -lvl reacher
37 | ```
38 |
39 |
40 |
41 |
42 | ### Walker2D TD3 - single worker
43 |
44 | ```bash
45 | coach -p Mujoco_TD3 -lvl walker2d
46 | ```
47 |
48 |
49 |
--------------------------------------------------------------------------------
/benchmarks/td3/ant.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/td3/ant.png
--------------------------------------------------------------------------------
/benchmarks/td3/half_cheetah.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/td3/half_cheetah.png
--------------------------------------------------------------------------------
/benchmarks/td3/hopper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/td3/hopper.png
--------------------------------------------------------------------------------
/benchmarks/td3/reacher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/td3/reacher.png
--------------------------------------------------------------------------------
/benchmarks/td3/walker2d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/benchmarks/td3/walker2d.png
--------------------------------------------------------------------------------
/dist-coach-config.template:
--------------------------------------------------------------------------------
1 | [coach]
2 | image =
3 | memory_backend = redispubsub
4 | data_store = s3
5 | s3_end_point = s3.amazonaws.com
6 | s3_bucket_name =
7 | s3_creds_file =
8 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM coach-base:master as builder
2 |
3 | # prep some of the more common environments
4 | # Gym (installed with coach)
5 | RUN pip3 install gym[atari]==0.12.5 box2d
6 | # Mujoco
7 | RUN mkdir -p ~/.mujoco \
8 | && wget https://www.roboti.us/download/mjpro150_linux.zip -O mujoco.zip \
9 | && unzip mujoco.zip -d ~/.mujoco \
10 | && rm mujoco.zip
11 | ARG MUJOCO_KEY
12 | ENV MUJOCO_KEY=$MUJOCO_KEY
13 | ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH
14 | RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt
15 | RUN pip3 install mujoco_py==1.50.1.68
16 | # Vizdoom
17 | RUN pip3 install vizdoom==1.1.7
18 |
19 | RUN mkdir /root/src
20 | COPY setup.py /root/src/.
21 | COPY requirements.txt /root/src/.
22 | RUN pip3 install -r /root/src/requirements.txt
23 |
24 | FROM coach-base:master
25 | WORKDIR /root/src
26 | COPY --from=builder /root/.mujoco /root/.mujoco
27 | ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH
28 | COPY --from=builder /root/.cache /root/.cache
29 | COPY setup.py /root/src/.
30 | COPY requirements.txt /root/src/.
31 | COPY README.md /root/src/.
32 | RUN pip3 install gym[atari]==0.12.5 box2d mujoco_py==1.50.1.68 vizdoom==1.1.7 && pip3 install -e .[all] && rm -rf /root/.cache
33 | COPY . /root/src
34 |
--------------------------------------------------------------------------------
/docker/Dockerfile.doom_environment:
--------------------------------------------------------------------------------
1 | FROM coach-base:master as builder
2 |
3 | # prep vizdoom and any of its related requirements.
4 | RUN pip3 install vizdoom==1.1.7
5 |
6 | # add coach source starting with files that could trigger
7 | # re-build if dependencies change.
8 | RUN mkdir /root/src
9 | COPY setup.py /root/src/.
10 | COPY requirements.txt /root/src/.
11 | RUN pip3 install -r /root/src/requirements.txt
12 |
13 | FROM coach-base:master
14 | WORKDIR /root/src
15 | COPY --from=builder /root/.cache /root/.cache
16 | COPY setup.py /root/src/.
17 | COPY requirements.txt /root/src/.
18 | COPY README.md /root/src/.
19 | RUN pip3 install vizdoom==1.1.7 && pip3 install -e .[all] && rm -rf /root/.cache
20 | COPY . /root/src
21 |
--------------------------------------------------------------------------------
/docker/Dockerfile.gym_environment:
--------------------------------------------------------------------------------
1 | FROM coach-base:master as builder
2 |
3 | # prep gym and any of its related requirements.
4 | RUN pip3 install gym[atari,box2d,classic_control]==0.12.5
5 |
6 | # add coach source starting with files that could trigger
7 | # re-build if dependencies change.
8 | RUN mkdir /root/src
9 | COPY setup.py /root/src/.
10 | COPY requirements.txt /root/src/.
11 | RUN pip3 install -r /root/src/requirements.txt
12 |
13 | FROM coach-base:master
14 | WORKDIR /root/src
15 | COPY --from=builder /root/.cache /root/.cache
16 | COPY setup.py /root/src/.
17 | COPY requirements.txt /root/src/.
18 | COPY README.md /root/src/.
19 | RUN pip3 install gym[atari,box2d,classic_control]==0.12.5 && pip3 install -e .[all] && rm -rf /root/.cache
20 | COPY . /root/src
21 |
--------------------------------------------------------------------------------
/docker/Dockerfile.mujoco_environment:
--------------------------------------------------------------------------------
1 | FROM coach-base:master as builder
2 |
3 | # prep mujoco and any of its related requirements.
4 | # Mujoco
5 | RUN mkdir -p ~/.mujoco \
6 | && wget https://www.roboti.us/download/mjpro150_linux.zip -O mujoco.zip \
7 | && unzip -n mujoco.zip -d ~/.mujoco \
8 | && rm mujoco.zip
9 | ARG MUJOCO_KEY
10 | ENV MUJOCO_KEY=$MUJOCO_KEY
11 | ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH
12 | RUN echo $MUJOCO_KEY | base64 --decode > /root/.mujoco/mjkey.txt
13 | RUN pip3 install mujoco_py==1.50.1.68
14 |
15 | # add coach source starting with files that could trigger
16 | # re-build if dependencies change.
17 | RUN mkdir /root/src
18 | COPY setup.py /root/src/.
19 | COPY requirements.txt /root/src/.
20 | RUN pip3 install -r /root/src/requirements.txt
21 |
22 | FROM coach-base:master
23 | WORKDIR /root/src
24 | COPY --from=builder /root/.mujoco /root/.mujoco
25 | ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:$LD_LIBRARY_PATH
26 | COPY --from=builder /root/.cache /root/.cache
27 | COPY setup.py /root/src/.
28 | COPY requirements.txt /root/src/.
29 | COPY README.md /root/src/.
30 | RUN pip3 install mujoco_py==1.50.1.68 && pip3 install -e .[all] && rm -rf /root/.cache
31 | COPY . /root/src
32 |
--------------------------------------------------------------------------------
/docker/Dockerfile.starcraft_environment:
--------------------------------------------------------------------------------
1 | FROM coach-base:master as builder
2 |
3 | # prep pysc2 and any of its related requirements.
4 | RUN wget http://blzdistsc2-a.akamaihd.net/Linux/SC2.3.17.zip -O sc2.zip \
5 | && unzip -P 'iagreetotheeula' -d ~ sc2.zip \
6 | && rm sc2.zip
7 | RUN wget https://github.com/deepmind/pysc2/releases/download/v1.2/mini_games.zip -O mini_games.zip \
8 | && unzip -d ~/StarCraftII/Maps mini_games.zip \
9 | && rm mini_games.zip
10 | RUN pip3 install pysc2
11 |
12 | # add coach source starting with files that could trigger
13 | # re-build if dependencies change.
14 | RUN mkdir /root/src
15 | COPY setup.py /root/src/.
16 | COPY requirements.txt /root/src/.
17 | RUN pip3 install -r /root/src/requirements.txt
18 |
19 | FROM coach-base:master
20 | WORKDIR /root/src
21 | COPY --from=builder /root/StarCraftII /root/StarCraftII
22 | COPY --from=builder /root/.cache /root/.cache
23 | COPY setup.py /root/src/.
24 | COPY requirements.txt /root/src/.
25 | COPY README.md /root/src/.
26 | RUN pip3 install pysc2 && pip3 install -e .[all] && rm -rf /root/.cache
27 | COPY . /root/src
28 |
--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/.nojekyll
--------------------------------------------------------------------------------
/docs/_images/ac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/ac.png
--------------------------------------------------------------------------------
/docs/_images/acer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/acer.png
--------------------------------------------------------------------------------
/docs/_images/act.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/act.png
--------------------------------------------------------------------------------
/docs/_images/algorithms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/algorithms.png
--------------------------------------------------------------------------------
/docs/_images/attention_discretization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/attention_discretization.png
--------------------------------------------------------------------------------
/docs/_images/bollinger_bands.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/bollinger_bands.png
--------------------------------------------------------------------------------
/docs/_images/box_discretization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/box_discretization.png
--------------------------------------------------------------------------------
/docs/_images/box_masking.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/box_masking.png
--------------------------------------------------------------------------------
/docs/_images/bs_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/bs_dqn.png
--------------------------------------------------------------------------------
/docs/_images/cil.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/cil.png
--------------------------------------------------------------------------------
/docs/_images/compare_by_num_episodes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/compare_by_num_episodes.png
--------------------------------------------------------------------------------
/docs/_images/compare_by_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/compare_by_time.png
--------------------------------------------------------------------------------
/docs/_images/ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/ddpg.png
--------------------------------------------------------------------------------
/docs/_images/design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/design.png
--------------------------------------------------------------------------------
/docs/_images/dfp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/dfp.png
--------------------------------------------------------------------------------
/docs/_images/distributed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/distributed.png
--------------------------------------------------------------------------------
/docs/_images/distributional_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/distributional_dqn.png
--------------------------------------------------------------------------------
/docs/_images/dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/dqn.png
--------------------------------------------------------------------------------
/docs/_images/dueling_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/dueling_dqn.png
--------------------------------------------------------------------------------
/docs/_images/filters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/filters.png
--------------------------------------------------------------------------------
/docs/_images/full_discrete_action_space_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/full_discrete_action_space_map.png
--------------------------------------------------------------------------------
/docs/_images/horizontal-scale-out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/horizontal-scale-out.png
--------------------------------------------------------------------------------
/docs/_images/improve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/improve.png
--------------------------------------------------------------------------------
/docs/_images/linear_box_to_box_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/linear_box_to_box_map.png
--------------------------------------------------------------------------------
/docs/_images/naf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/naf.png
--------------------------------------------------------------------------------
/docs/_images/nec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/nec.png
--------------------------------------------------------------------------------
/docs/_images/network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/network.png
--------------------------------------------------------------------------------
/docs/_images/observe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/observe.png
--------------------------------------------------------------------------------
/docs/_images/partial_discrete_action_space_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/partial_discrete_action_space_map.png
--------------------------------------------------------------------------------
/docs/_images/pg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/pg.png
--------------------------------------------------------------------------------
/docs/_images/ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/ppo.png
--------------------------------------------------------------------------------
/docs/_images/qr_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/qr_dqn.png
--------------------------------------------------------------------------------
/docs/_images/rainbow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/rainbow.png
--------------------------------------------------------------------------------
/docs/_images/sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/sac.png
--------------------------------------------------------------------------------
/docs/_images/separate_signals.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/separate_signals.png
--------------------------------------------------------------------------------
/docs/_images/td3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/td3.png
--------------------------------------------------------------------------------
/docs/_images/train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/train.png
--------------------------------------------------------------------------------
/docs/_images/updating_dynamically.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/updating_dynamically.gif
--------------------------------------------------------------------------------
/docs/_images/wolpertinger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_images/wolpertinger.png
--------------------------------------------------------------------------------
/docs/_sources/components/additional_parameters.rst.txt:
--------------------------------------------------------------------------------
1 | Additional Parameters
2 | =====================
3 |
4 | VisualizationParameters
5 | -----------------------
6 | .. autoclass:: rl_coach.base_parameters.VisualizationParameters
7 |
8 | PresetValidationParameters
9 | --------------------------
10 | .. autoclass:: rl_coach.base_parameters.PresetValidationParameters
11 |
12 | TaskParameters
13 | --------------
14 | .. autoclass:: rl_coach.base_parameters.TaskParameters
15 |
16 | DistributedTaskParameters
17 | -------------------------
18 | .. autoclass:: rl_coach.base_parameters.DistributedTaskParameters
19 |
--------------------------------------------------------------------------------
/docs/_sources/components/agents/imitation/bc.rst.txt:
--------------------------------------------------------------------------------
1 | Behavioral Cloning
2 | ==================
3 |
4 | **Actions space:** Discrete | Continuous
5 |
6 | Network Structure
7 | -----------------
8 |
9 | .. image:: /_static/img/design_imgs/pg.png
10 | :align: center
11 |
12 |
13 | Algorithm Description
14 | ---------------------
15 |
16 | Training the network
17 | ++++++++++++++++++++
18 |
19 | The replay buffer contains the expert demonstrations for the task.
20 | These demonstrations are given as state, action tuples, and with no reward.
21 | The training goal is to reduce the difference between the actions predicted by the network and the actions taken by
22 | the expert for each state.
23 |
24 | 1. Sample a batch of transitions from the replay buffer.
25 | 2. Use the current states as input to the network, and the expert actions as the targets of the network.
26 | 3. For the network head, we use the policy head, which uses the cross entropy loss function.
27 |
28 |
29 | .. autoclass:: rl_coach.agents.bc_agent.BCAlgorithmParameters
--------------------------------------------------------------------------------
/docs/_sources/components/agents/index.rst.txt:
--------------------------------------------------------------------------------
1 | Agents
2 | ======
3 |
4 | Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into three main classes -
5 | value optimization, policy optimization and imitation learning.
6 | A detailed description of those algorithms can be found by navigating to each of the algorithm pages.
7 |
8 | .. image:: /_static/img/algorithms.png
9 | :width: 600px
10 | :align: center
11 |
12 | .. toctree::
13 | :maxdepth: 1
14 | :caption: Agents
15 |
16 | policy_optimization/ac
17 | policy_optimization/acer
18 | imitation/bc
19 | value_optimization/bs_dqn
20 | value_optimization/categorical_dqn
21 | imitation/cil
22 | policy_optimization/cppo
23 | policy_optimization/ddpg
24 | other/dfp
25 | value_optimization/double_dqn
26 | value_optimization/dqn
27 | value_optimization/dueling_dqn
28 | value_optimization/mmc
29 | value_optimization/n_step
30 | value_optimization/naf
31 | value_optimization/nec
32 | value_optimization/pal
33 | policy_optimization/pg
34 | policy_optimization/ppo
35 | value_optimization/rainbow
36 | value_optimization/qr_dqn
37 | policy_optimization/sac
38 | policy_optimization/td3
39 | policy_optimization/wolpertinger
40 |
41 |
42 |
43 | .. autoclass:: rl_coach.base_parameters.AgentParameters
44 |
45 | .. autoclass:: rl_coach.agents.agent.Agent
46 | :members:
47 | :inherited-members:
48 |
49 |
--------------------------------------------------------------------------------
/docs/_sources/components/agents/policy_optimization/ac.rst.txt:
--------------------------------------------------------------------------------
1 | Actor-Critic
2 | ============
3 |
4 | **Actions space:** Discrete | Continuous
5 |
6 | **References:** `Asynchronous Methods for Deep Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/ac.png
12 | :width: 500px
13 | :align: center
14 |
15 | Algorithm Description
16 | ---------------------
17 |
18 | Choosing an action - Discrete actions
19 | +++++++++++++++++++++++++++++++++++++
20 |
21 | The policy network is used in order to predict action probabilites. While training, a sample is taken from a categorical
22 | distribution assigned with these probabilities. When testing, the action with the highest probability is used.
23 |
24 | Training the network
25 | ++++++++++++++++++++
26 | A batch of :math:`T_{max}` transitions is used, and the advantages are calculated upon it.
27 |
28 | Advantages can be calculated by either of the following methods (configured by the selected preset) -
29 |
30 | 1. **A_VALUE** - Estimating advantage directly:
31 | :math:`A(s_t, a_t) = \underbrace{\sum_{i=t}^{i=t + k - 1} \gamma^{i-t}r_i +\gamma^{k} V(s_{t+k})}_{Q(s_t, a_t)} - V(s_t)`
32 | where :math:`k` is :math:`T_{max} - State\_Index` for each state in the batch.
33 |
34 | 2. **GAE** - By following the `Generalized Advantage Estimation `_ paper.
35 |
36 | The advantages are then used in order to accumulate gradients according to
37 | :math:`L = -\mathop{\mathbb{E}} [log (\pi) \cdot A]`
38 |
39 |
40 | .. autoclass:: rl_coach.agents.actor_critic_agent.ActorCriticAlgorithmParameters
--------------------------------------------------------------------------------
/docs/_sources/components/agents/policy_optimization/hac.rst.txt:
--------------------------------------------------------------------------------
1 | Hierarchical Actor Critic
2 | =========================
3 |
4 | **Actions space:** Continuous
5 |
6 | **References:** `Hierarchical Reinforcement Learning with Hindsight `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/ddpg.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 | Choosing an action
17 | ++++++++++++++++++
18 |
19 | Pass the current states through the actor network, and get an action mean vector :math:`\mu`.
20 | While in training phase, use a continuous exploration policy, such as the Ornstein-Uhlenbeck process,
21 | to add exploration noise to the action. When testing, use the mean vector :math:`\mu` as-is.
22 |
23 | Training the network
24 | ++++++++++++++++++++
25 |
--------------------------------------------------------------------------------
/docs/_sources/components/agents/value_optimization/categorical_dqn.rst.txt:
--------------------------------------------------------------------------------
1 | Categorical DQN
2 | ===============
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `A Distributional Perspective on Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/distributional_dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | 1. Sample a batch of transitions from the replay buffer.
21 |
22 | 2. The Bellman update is projected to the set of atoms representing the :math:`Q` values distribution, such
23 | that the :math:`i-th` component of the projected update is calculated as follows:
24 |
25 | :math:`(\Phi \hat{T} Z_{\theta}(s_t,a_t))_i=\sum_{j=0}^{N-1}\Big[1-\frac{\lvert[\hat{T}_{z_{j}}]^{V_{MAX}}_{V_{MIN}}-z_i\rvert}{\Delta z}\Big]^1_0 \ p_j(s_{t+1}, \pi(s_{t+1}))`
26 |
27 | where:
28 | * :math:`[ \cdot ]` bounds its argument in the range :math:`[a, b]`
29 | * :math:`\hat{T}_{z_{j}}` is the Bellman update for atom :math:`z_j`: :math:`\hat{T}_{z_{j}} := r+\gamma z_j`
30 |
31 |
32 | 3. Network is trained with the cross entropy loss between the resulting probability distribution and the target
33 | probability distribution. Only the target of the actions that were actually taken is updated.
34 |
35 | 4. Once in every few thousand steps, weights are copied from the online network to the target network.
36 |
37 |
38 |
39 | .. autoclass:: rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters
40 |
--------------------------------------------------------------------------------
/docs/_sources/components/agents/value_optimization/double_dqn.rst.txt:
--------------------------------------------------------------------------------
1 | Double DQN
2 | ==========
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Deep Reinforcement Learning with Double Q-learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | 1. Sample a batch of transitions from the replay buffer.
21 |
22 | 2. Using the next states from the sampled batch, run the online network in order to find the :math:`Q` maximizing
23 | action :math:`argmax_a Q(s_{t+1},a)`. For these actions, use the corresponding next states and run the target
24 | network to calculate :math:`Q(s_{t+1},argmax_a Q(s_{t+1},a))`.
25 |
26 | 3. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss),
27 | use the current states from the sampled batch, and run the online network to get the current Q values predictions.
28 | Set those values as the targets for the actions that were not actually played.
29 |
30 | 4. For each action that was played, use the following equation for calculating the targets of the network:
31 | :math:`y_t=r(s_t,a_t )+\gamma \cdot Q(s_{t+1},argmax_a Q(s_{t+1},a))`
32 |
33 | 5. Finally, train the online network using the current states as inputs, and with the aforementioned targets.
34 |
35 | 6. Once in every few thousand steps, copy the weights from the online network to the target network.
36 |
--------------------------------------------------------------------------------
/docs/_sources/components/agents/value_optimization/dqn.rst.txt:
--------------------------------------------------------------------------------
1 | Deep Q Networks
2 | ===============
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Playing Atari with Deep Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | 1. Sample a batch of transitions from the replay buffer.
21 |
22 | 2. Using the next states from the sampled batch, run the target network to calculate the :math:`Q` values for each of
23 | the actions :math:`Q(s_{t+1},a)`, and keep only the maximum value for each state.
24 |
25 | 3. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss),
26 | use the current states from the sampled batch, and run the online network to get the current Q values predictions.
27 | Set those values as the targets for the actions that were not actually played.
28 |
29 | 4. For each action that was played, use the following equation for calculating the targets of the network:
30 | :math:`y_t=r(s_t,a_t )+\gamma \cdot max_a Q(s_{t+1})`
31 |
32 | 5. Finally, train the online network using the current states as inputs, and with the aforementioned targets.
33 |
34 | 6. Once in every few thousand steps, copy the weights from the online network to the target network.
35 |
36 |
37 | .. autoclass:: rl_coach.agents.dqn_agent.DQNAlgorithmParameters
38 |
--------------------------------------------------------------------------------
/docs/_sources/components/agents/value_optimization/dueling_dqn.rst.txt:
--------------------------------------------------------------------------------
1 | Dueling DQN
2 | ===========
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Dueling Network Architectures for Deep Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dueling_dqn.png
12 | :align: center
13 |
14 | General Description
15 | -------------------
16 | Dueling DQN presents a change in the network structure comparing to DQN.
17 |
18 | Dueling DQN uses a specialized *Dueling Q Head* in order to separate :math:`Q` to an :math:`A` (advantage)
19 | stream and a :math:`V` stream. Adding this type of structure to the network head allows the network to better differentiate
20 | actions from one another, and significantly improves the learning.
21 |
22 | In many states, the values of the different actions are very similar, and it is less important which action to take.
23 | This is especially important in environments where there are many actions to choose from. In DQN, on each training
24 | iteration, for each of the states in the batch, we update the :ath:`Q` values only for the specific actions taken in
25 | those states. This results in slower learning as we do not learn the :math:`Q` values for actions that were not taken yet.
26 | On dueling architecture, on the other hand, learning is faster - as we start learning the state-value even if only a
27 | single action has been taken at this state.
--------------------------------------------------------------------------------
/docs/_sources/components/agents/value_optimization/mmc.rst.txt:
--------------------------------------------------------------------------------
1 | Mixed Monte Carlo
2 | =================
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Count-Based Exploration with Neural Density Models `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 | Training the network
17 | ++++++++++++++++++++
18 |
19 | In MMC, targets are calculated as a mixture between Double DQN targets and full Monte Carlo samples (total discounted returns).
20 |
21 | The DDQN targets are calculated in the same manner as in the DDQN agent:
22 |
23 | :math:`y_t^{DDQN}=r(s_t,a_t )+\gamma Q(s_{t+1},argmax_a Q(s_{t+1},a))`
24 |
25 | The Monte Carlo targets are calculated by summing up the discounted rewards across the entire episode:
26 |
27 | :math:`y_t^{MC}=\sum_{j=0}^T\gamma^j r(s_{t+j},a_{t+j} )`
28 |
29 | A mixing ratio $\alpha$ is then used to get the final targets:
30 |
31 | :math:`y_t=(1-\alpha)\cdot y_t^{DDQN}+\alpha \cdot y_t^{MC}`
32 |
33 | Finally, the online network is trained using the current states as inputs, and the calculated targets.
34 | Once in every few thousand steps, copy the weights from the online network to the target network.
35 |
36 |
37 | .. autoclass:: rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters
38 |
--------------------------------------------------------------------------------
/docs/_sources/components/agents/value_optimization/n_step.rst.txt:
--------------------------------------------------------------------------------
1 | N-Step Q Learning
2 | =================
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Asynchronous Methods for Deep Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | The :math:`N`-step Q learning algorithm works in similar manner to DQN except for the following changes:
21 |
22 | 1. No replay buffer is used. Instead of sampling random batches of transitions, the network is trained every
23 | :math:`N` steps using the latest :math:`N` steps played by the agent.
24 |
25 | 2. In order to stabilize the learning, multiple workers work together to update the network.
26 | This creates the same effect as uncorrelating the samples used for training.
27 |
28 | 3. Instead of using single-step Q targets for the network, the rewards from $N$ consequent steps are accumulated
29 | to form the :math:`N`-step Q targets, according to the following equation:
30 | :math:`R(s_t, a_t) = \sum_{i=t}^{i=t + k - 1} \gamma^{i-t}r_i +\gamma^{k} V(s_{t+k})`
31 | where :math:`k` is :math:`T_{max} - State\_Index` for each state in the batch
32 |
33 |
34 |
35 | .. autoclass:: rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters
36 |
--------------------------------------------------------------------------------
/docs/_sources/components/agents/value_optimization/naf.rst.txt:
--------------------------------------------------------------------------------
1 | Normalized Advantage Functions
2 | ==============================
3 |
4 | **Actions space:** Continuous
5 |
6 | **References:** `Continuous Deep Q-Learning with Model-based Acceleration `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/naf.png
12 | :width: 600px
13 | :align: center
14 |
15 | Algorithm Description
16 | ---------------------
17 | Choosing an action
18 | ++++++++++++++++++
19 | The current state is used as an input to the network. The action mean :math:`\mu(s_t )` is extracted from the output head.
20 | It is then passed to the exploration policy which adds noise in order to encourage exploration.
21 |
22 | Training the network
23 | ++++++++++++++++++++
24 | The network is trained by using the following targets:
25 | :math:`y_t=r(s_t,a_t )+\gamma\cdot V(s_{t+1})`
26 | Use the next states as the inputs to the target network and extract the :math:`V` value, from within the head,
27 | to get :math:`V(s_{t+1} )`. Then, update the online network using the current states and actions as inputs,
28 | and :math:`y_t` as the targets.
29 | After every training step, use a soft update in order to copy the weights from the online network to the target network.
30 |
31 |
32 |
33 | .. autoclass:: rl_coach.agents.naf_agent.NAFAlgorithmParameters
34 |
--------------------------------------------------------------------------------
/docs/_sources/components/agents/value_optimization/qr_dqn.rst.txt:
--------------------------------------------------------------------------------
1 | Quantile Regression DQN
2 | =======================
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Distributional Reinforcement Learning with Quantile Regression `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/qr_dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | 1. Sample a batch of transitions from the replay buffer.
21 |
22 | 2. First, the next state quantiles are predicted. These are used in order to calculate the targets for the network,
23 | by following the Bellman equation.
24 | Next, the current quantile locations for the current states are predicted, sorted, and used for calculating the
25 | quantile midpoints targets.
26 |
27 | 3. The network is trained with the quantile regression loss between the resulting quantile locations and the target
28 | quantile locations. Only the targets of the actions that were actually taken are updated.
29 |
30 | 4. Once in every few thousand steps, weights are copied from the online network to the target network.
31 |
32 |
33 | .. autoclass:: rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters
--------------------------------------------------------------------------------
/docs/_sources/components/architectures/index.rst.txt:
--------------------------------------------------------------------------------
1 | Architectures
2 | =============
3 |
4 | Architectures contain all the classes that implement the neural network related stuff for the agent.
5 | Since Coach is intended to work with multiple neural network frameworks, each framework will implement its
6 | own components under a dedicated directory. For example, tensorflow components will contain all the neural network
7 | parts that are implemented using TensorFlow.
8 |
9 | .. autoclass:: rl_coach.base_parameters.NetworkParameters
10 |
11 | Architecture
12 | ------------
13 | .. autoclass:: rl_coach.architectures.architecture.Architecture
14 | :members:
15 | :inherited-members:
16 |
17 | NetworkWrapper
18 | --------------
19 |
20 | .. image:: /_static/img/distributed.png
21 | :width: 600px
22 | :align: center
23 |
24 | .. autoclass:: rl_coach.architectures.network_wrapper.NetworkWrapper
25 | :members:
26 | :inherited-members:
27 |
28 |
--------------------------------------------------------------------------------
/docs/_sources/components/core_types.rst.txt:
--------------------------------------------------------------------------------
1 | Core Types
2 | ==========
3 |
4 | ActionInfo
5 | ----------
6 | .. autoclass:: rl_coach.core_types.ActionInfo
7 | :members:
8 | :inherited-members:
9 |
10 | Batch
11 | -----
12 | .. autoclass:: rl_coach.core_types.Batch
13 | :members:
14 | :inherited-members:
15 |
16 | EnvResponse
17 | -----------
18 | .. autoclass:: rl_coach.core_types.EnvResponse
19 | :members:
20 | :inherited-members:
21 |
22 | Episode
23 | -------
24 | .. autoclass:: rl_coach.core_types.Episode
25 | :members:
26 | :inherited-members:
27 |
28 | Transition
29 | ----------
30 | .. autoclass:: rl_coach.core_types.Transition
31 | :members:
32 | :inherited-members:
33 |
34 |
--------------------------------------------------------------------------------
/docs/_sources/components/data_stores/index.rst.txt:
--------------------------------------------------------------------------------
1 | Data Stores
2 | ===========
3 |
4 | S3DataStore
5 | -----------
6 | .. autoclass:: rl_coach.data_stores.s3_data_store.S3DataStore
7 |
8 | NFSDataStore
9 | ------------
10 | .. autoclass:: rl_coach.data_stores.nfs_data_store.NFSDataStore
11 |
--------------------------------------------------------------------------------
/docs/_sources/components/filters/index.rst.txt:
--------------------------------------------------------------------------------
1 | Filters
2 | =======
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 | :caption: Filters
7 |
8 | input_filters
9 | output_filters
10 |
11 | Filters are a mechanism in Coach that allows doing pre-processing and post-processing of the internal agent information.
12 | There are two filter categories -
13 |
14 | * **Input filters** - these are filters that process the information passed **into** the agent from the environment.
15 | This information includes the observation and the reward. Input filters therefore allow rescaling observations,
16 | normalizing rewards, stack observations, etc.
17 |
18 | * **Output filters** - these are filters that process the information going **out** of the agent into the environment.
19 | This information includes the action the agent chooses to take. Output filters therefore allow conversion of
20 | actions from one space into another. For example, the agent can take :math:`N` discrete actions, that will be mapped by
21 | the output filter onto :math:`N` continuous actions.
22 |
23 | Filters can be stacked on top of each other in order to build complex processing flows of the inputs or outputs.
24 |
25 | .. image:: /_static/img/filters.png
26 | :width: 350px
27 | :align: center
28 |
29 |
--------------------------------------------------------------------------------
/docs/_sources/components/filters/output_filters.rst.txt:
--------------------------------------------------------------------------------
1 | Output Filters
2 | --------------
3 |
4 | The output filters only process the actions.
5 |
6 | Action Filters
7 | ++++++++++++++
8 |
9 | .. autoclass:: rl_coach.filters.action.AttentionDiscretization
10 |
11 | .. image:: /_static/img/attention_discretization.png
12 | :align: center
13 |
14 | .. autoclass:: rl_coach.filters.action.BoxDiscretization
15 |
16 | .. image:: /_static/img/box_discretization.png
17 | :align: center
18 |
19 | .. autoclass:: rl_coach.filters.action.BoxMasking
20 |
21 | .. image:: /_static/img/box_masking.png
22 | :align: center
23 |
24 | .. autoclass:: rl_coach.filters.action.PartialDiscreteActionSpaceMap
25 |
26 | .. image:: /_static/img/partial_discrete_action_space_map.png
27 | :align: center
28 |
29 | .. autoclass:: rl_coach.filters.action.FullDiscreteActionSpaceMap
30 |
31 | .. image:: /_static/img/full_discrete_action_space_map.png
32 | :align: center
33 |
34 | .. autoclass:: rl_coach.filters.action.LinearBoxToBoxMap
35 |
36 | .. image:: /_static/img/linear_box_to_box_map.png
37 | :align: center
--------------------------------------------------------------------------------
/docs/_sources/components/memories/index.rst.txt:
--------------------------------------------------------------------------------
1 | Memories
2 | ========
3 |
4 | Episodic Memories
5 | -----------------
6 |
7 | EpisodicExperienceReplay
8 | ++++++++++++++++++++++++
9 | .. autoclass:: rl_coach.memories.episodic.EpisodicExperienceReplay
10 |
11 | EpisodicHindsightExperienceReplay
12 | +++++++++++++++++++++++++++++++++
13 | .. autoclass:: rl_coach.memories.episodic.EpisodicHindsightExperienceReplay
14 |
15 | EpisodicHRLHindsightExperienceReplay
16 | ++++++++++++++++++++++++++++++++++++
17 | .. autoclass:: rl_coach.memories.episodic.EpisodicHRLHindsightExperienceReplay
18 |
19 | SingleEpisodeBuffer
20 | +++++++++++++++++++
21 | .. autoclass:: rl_coach.memories.episodic.SingleEpisodeBuffer
22 |
23 |
24 | Non-Episodic Memories
25 | ---------------------
26 | BalancedExperienceReplay
27 | ++++++++++++++++++++++++
28 | .. autoclass:: rl_coach.memories.non_episodic.BalancedExperienceReplay
29 |
30 | QDND
31 | ++++
32 | .. autoclass:: rl_coach.memories.non_episodic.QDND
33 |
34 | ExperienceReplay
35 | ++++++++++++++++
36 | .. autoclass:: rl_coach.memories.non_episodic.ExperienceReplay
37 |
38 | PrioritizedExperienceReplay
39 | +++++++++++++++++++++++++++
40 | .. autoclass:: rl_coach.memories.non_episodic.PrioritizedExperienceReplay
41 |
42 | TransitionCollection
43 | ++++++++++++++++++++
44 | .. autoclass:: rl_coach.memories.non_episodic.TransitionCollection
45 |
--------------------------------------------------------------------------------
/docs/_sources/components/memory_backends/index.rst.txt:
--------------------------------------------------------------------------------
1 | Memory Backends
2 | ===============
3 |
4 | RedisPubSubBackend
5 | ------------------
6 | .. autoclass:: rl_coach.memories.backend.redis.RedisPubSubBackend
7 |
--------------------------------------------------------------------------------
/docs/_sources/components/orchestrators/index.rst.txt:
--------------------------------------------------------------------------------
1 | Orchestrators
2 | =============
3 |
4 |
5 | Kubernetes
6 | ----------
7 | .. autoclass:: rl_coach.orchestrators.kubernetes_orchestrator.Kubernetes
8 |
--------------------------------------------------------------------------------
/docs/_sources/components/spaces.rst.txt:
--------------------------------------------------------------------------------
1 | Spaces
2 | ======
3 |
4 | Space
5 | -----
6 | .. autoclass:: rl_coach.spaces.Space
7 | :members:
8 | :inherited-members:
9 |
10 |
11 |
12 | Observation Spaces
13 | ------------------
14 | .. autoclass:: rl_coach.spaces.ObservationSpace
15 | :members:
16 | :inherited-members:
17 |
18 | VectorObservationSpace
19 | ++++++++++++++++++++++
20 | .. autoclass:: rl_coach.spaces.VectorObservationSpace
21 |
22 | PlanarMapsObservationSpace
23 | ++++++++++++++++++++++++++
24 | .. autoclass:: rl_coach.spaces.PlanarMapsObservationSpace
25 |
26 | ImageObservationSpace
27 | +++++++++++++++++++++
28 | .. autoclass:: rl_coach.spaces.ImageObservationSpace
29 |
30 |
31 |
32 | Action Spaces
33 | -------------
34 | .. autoclass:: rl_coach.spaces.ActionSpace
35 | :members:
36 | :inherited-members:
37 |
38 | AttentionActionSpace
39 | ++++++++++++++++++++
40 | .. autoclass:: rl_coach.spaces.AttentionActionSpace
41 |
42 | BoxActionSpace
43 | ++++++++++++++
44 | .. autoclass:: rl_coach.spaces.BoxActionSpace
45 |
46 | DiscreteActionSpace
47 | ++++++++++++++++++++
48 | .. autoclass:: rl_coach.spaces.DiscreteActionSpace
49 |
50 | MultiSelectActionSpace
51 | ++++++++++++++++++++++
52 | .. autoclass:: rl_coach.spaces.MultiSelectActionSpace
53 |
54 | CompoundActionSpace
55 | +++++++++++++++++++
56 | .. autoclass:: rl_coach.spaces.CompoundActionSpace
57 |
58 |
59 |
60 | Goal Spaces
61 | -----------
62 | .. autoclass:: rl_coach.spaces.GoalsSpace
63 | :members:
64 | :inherited-members:
65 |
--------------------------------------------------------------------------------
/docs/_sources/features/algorithms.rst.txt:
--------------------------------------------------------------------------------
1 | Algorithms
2 | ==========
3 |
4 | Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into three main classes -
5 | value optimization, policy optimization and imitation learning.
6 | A detailed description of those algorithms may be found in the `agents <../components/agents/index.html>`_ section.
7 |
8 | .. image:: /_static/img/algorithms.png
9 | :width: 600px
10 | :align: center
--------------------------------------------------------------------------------
/docs/_sources/features/batch_rl.rst.txt:
--------------------------------------------------------------------------------
1 | Batch Reinforcement Learning
2 | ============================
3 |
4 | Coach supports Batch Reinforcement Learning, where learning is based solely on a (fixed) batch of data.
5 | In Batch RL, we are given a dataset of experience, which was collected using some (one or more) deployed policies, and we would
6 | like to use it to learn a better policy than what was used to collect the dataset.
7 | There is no simulator to interact with, and so we cannot collect any new data, meaning we often cannot explore the MDP any further.
8 | To make things even harder, we would also like to use the dataset in order to evaluate the newly learned policy
9 | (using off-policy evaluation), since we do not have a simulator which we can use to evaluate the policy on.
10 | Batch RL is also often beneficial in cases where we just want to separate the inference (data collection) from the
11 | training process of a new policy. This is often the case where we have a system on which we could quite easily deploy a policy
12 | and collect experience data, but cannot easily use that system's setup to online train a new policy (as is often the
13 | case with more standard RL algorithms).
14 |
15 | Coach supports (almost) all of the integrated off-policy algorithms with Batch RL.
16 |
17 | A lot more details and example usage can be found in the
18 | `tutorial `_.
--------------------------------------------------------------------------------
/docs/_sources/features/benchmarks.rst.txt:
--------------------------------------------------------------------------------
1 | Benchmarks
2 | ==========
3 |
4 | Reinforcement learning is a developing field, and so far it has been particularly difficult to reproduce some of the
5 | results published in the original papers. Some reasons for this are:
6 |
7 | * Reinforcement learning algorithms are notoriously known as having an unstable learning process.
8 | The data the neural networks trains on is dynamic, and depends on the random seed defined for the environment.
9 |
10 | * Reinforcement learning algorithms have many moving parts. For some environments and agents, there are many
11 | "tricks" which are needed to get the exact behavior the paper authors had seen. Also, there are **a lot** of
12 | hyper-parameters to set.
13 |
14 | In order for a reinforcement learning implementation to be useful for research or for data science, it must be
15 | shown that it achieves the expected behavior. For this reason, we collected a set of benchmark results from most
16 | of the algorithms implemented in Coach. The algorithms were tested on a subset of the same environments that were
17 | used in the original papers, and with multiple seed for each environment.
18 | Additionally, Coach uses some strict testing mechanisms to try and make sure the results we show for these
19 | benchmarks stay intact as Coach continues to develop.
20 |
21 | To see the benchmark results, please visit the
22 | `following GitHub page `_.
--------------------------------------------------------------------------------
/docs/_sources/features/index.rst.txt:
--------------------------------------------------------------------------------
1 | Features
2 | ========
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 | :caption: Features
7 |
8 | algorithms
9 | environments
10 | benchmarks
11 | batch_rl
--------------------------------------------------------------------------------
/docs/_sources/test.rst.txt:
--------------------------------------------------------------------------------
1 | test
2 | ----
3 |
4 | .. important:: Its a note! in markdown!
5 |
6 | .. autoclass:: rl_coach.agents.dqn_agent.DQNAgent
7 | :members:
8 | :inherited-members:
--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | /* Docs background */
2 | .wy-side-nav-search{
3 | background-color: #043c74;
4 | }
5 |
6 | /* Mobile version */
7 | .wy-nav-top{
8 | background-color: #043c74;
9 | }
10 |
11 |
12 | .green {
13 | color: green;
14 | }
15 |
16 | .red {
17 | color: red;
18 | }
19 |
20 | .blue {
21 | color: blue;
22 | }
23 |
24 | .yellow {
25 | color: yellow;
26 | }
27 |
28 | .badge {
29 | border: 2px;
30 | border-style: solid;
31 | border-color: #6C8EBF;
32 | border-radius: 5px;
33 | padding: 3px 15px 3px 15px;
34 | margin: 5px;
35 | display: inline-block;
36 | font-weight: bold;
37 | font-size: 16px;
38 | background: #DAE8FC;
39 | }
40 |
41 | .badge:hover {
42 | cursor: pointer;
43 | }
44 |
45 | .badge > a {
46 | color: black;
47 | }
48 |
49 | .bordered-container {
50 | border: 0px;
51 | border-style: solid;
52 | border-radius: 8px;
53 | padding: 15px;
54 | margin-bottom: 20px;
55 | background: #f2f2f2;
56 | }
57 |
58 | .questionnaire {
59 | font-size: 1.2em;
60 | line-height: 1.5em;
61 | }
--------------------------------------------------------------------------------
/docs/_static/dark_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/dark_logo.png
--------------------------------------------------------------------------------
/docs/_static/documentation_options.js:
--------------------------------------------------------------------------------
1 | var DOCUMENTATION_OPTIONS = {
2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
3 | VERSION: '0.12.0',
4 | LANGUAGE: 'None',
5 | COLLAPSE_INDEX: false,
6 | FILE_SUFFIX: '.html',
7 | HAS_SOURCE: true,
8 | SOURCELINK_SUFFIX: '.txt',
9 | NAVIGATION_WITH_KEYS: false
10 | };
--------------------------------------------------------------------------------
/docs/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/file.png
--------------------------------------------------------------------------------
/docs/_static/fonts/Inconsolata-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Inconsolata-Bold.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/Inconsolata-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Inconsolata-Regular.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/Inconsolata.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Inconsolata.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato-Bold.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato-Regular.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-bold.eot
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-bold.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-bold.woff
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-bold.woff2
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bolditalic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-bolditalic.eot
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bolditalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-bolditalic.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bolditalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-bolditalic.woff
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bolditalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-bolditalic.woff2
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-italic.eot
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-italic.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-italic.woff
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-italic.woff2
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-regular.eot
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-regular.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-regular.woff
--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/Lato/lato-regular.woff2
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab-Bold.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab-Regular.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff
--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2
--------------------------------------------------------------------------------
/docs/_static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/docs/_static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/docs/_static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/docs/_static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/fonts/fontawesome-webfont.woff2
--------------------------------------------------------------------------------
/docs/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/minus.png
--------------------------------------------------------------------------------
/docs/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/_static/plus.png
--------------------------------------------------------------------------------
/docs/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs/objects.inv
--------------------------------------------------------------------------------
/docs_raw/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = source
8 | BUILDDIR = build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/docs_raw/README.md:
--------------------------------------------------------------------------------
1 | # Coach Documentation
2 |
3 | Coach uses Sphinx with a Read The Docs theme for its documentation website.
4 | The website is hosted on GitHub Pages, and is automatically pulled from the repository through the built docs directory.
5 |
6 | To build automatically, first go to 'docs_raw' directory; the following is installing all required packages, making html
7 | copying all new docs into 'coach/docs/'
8 |
9 | Run the following command (make sure the it's an executable file):
10 | ```
11 | ./build_docs.sh
12 | ```
13 |
14 | To build manually the documentation website locally, first install the following requirements:
15 |
16 | ```
17 | pip install Sphinx
18 | pip install recommonmark
19 | pip install sphinx_rtd_theme
20 | pip install sphinx-autobuild
21 | pip install sphinx-argparse
22 | ```
23 |
24 | Then there are two option to build:
25 | 1. Build using the make file (recommended). Run from within the `docs_raw` directory:
26 |
27 | ```
28 | make html
29 | cp source/_static/css/custom.css build/html/_static/css/
30 | rm -rf ../docs/
31 | mkdir ../docs
32 | touch ../docs/.nojekyll
33 | cp -R build/html/* ../docs/
34 | ```
35 |
36 | 2. Build automatically after every change while editing the files:
37 |
38 | ```
39 | sphinx-autobuild source build/html
40 | ```
41 |
--------------------------------------------------------------------------------
/docs_raw/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/__init__.py
--------------------------------------------------------------------------------
/docs_raw/build_docs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo "installing requirements..."
4 |
5 | pip3 install Sphinx
6 | pip3 install recommonmark
7 | pip3 install sphinx_rtd_theme
8 | pip3 install sphinx-autobuild
9 | pip3 install sphinx-argparse
10 |
11 | echo "Making docs..."
12 |
13 | make html
14 |
15 | echo "Copying new docs into coach/docs/"
16 |
17 | cp source/_static/css/custom.css build/html/_static/css/
18 | rm -rf ../docs/
19 | mkdir ../docs
20 | touch ../docs/.nojekyll
21 | cp -R build/html/* ../docs/
22 | rm -r build
23 |
24 | echo "Finished!"
--------------------------------------------------------------------------------
/docs_raw/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs_raw/source/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/__init__.py
--------------------------------------------------------------------------------
/docs_raw/source/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | /* Docs background */
2 | .wy-side-nav-search{
3 | background-color: #043c74;
4 | }
5 |
6 | /* Mobile version */
7 | .wy-nav-top{
8 | background-color: #043c74;
9 | }
10 |
11 |
12 | .green {
13 | color: green;
14 | }
15 |
16 | .red {
17 | color: red;
18 | }
19 |
20 | .blue {
21 | color: blue;
22 | }
23 |
24 | .yellow {
25 | color: yellow;
26 | }
27 |
28 | .badge {
29 | border: 2px;
30 | border-style: solid;
31 | border-color: #6C8EBF;
32 | border-radius: 5px;
33 | padding: 3px 15px 3px 15px;
34 | margin: 5px;
35 | display: inline-block;
36 | font-weight: bold;
37 | font-size: 16px;
38 | background: #DAE8FC;
39 | }
40 |
41 | .badge:hover {
42 | cursor: pointer;
43 | }
44 |
45 | .badge > a {
46 | color: black;
47 | }
48 |
49 | .bordered-container {
50 | border: 0px;
51 | border-style: solid;
52 | border-radius: 8px;
53 | padding: 15px;
54 | margin-bottom: 20px;
55 | background: #f2f2f2;
56 | }
57 |
58 | .questionnaire {
59 | font-size: 1.2em;
60 | line-height: 1.5em;
61 | }
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/act.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/act.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/algorithms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/algorithms.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/attention_discretization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/attention_discretization.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/bollinger_bands.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/bollinger_bands.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/box_discretization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/box_discretization.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/box_masking.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/box_masking.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/compare_by_num_episodes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/compare_by_num_episodes.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/compare_by_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/compare_by_time.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/dark_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/dark_logo.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/ac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/ac.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/acer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/acer.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/bs_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/bs_dqn.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/cil.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/cil.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/ddpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/ddpg.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/dfp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/dfp.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/distributional_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/distributional_dqn.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/dqn.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/dueling_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/dueling_dqn.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/naf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/naf.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/nec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/nec.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/pg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/pg.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/ppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/ppo.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/qr_dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/qr_dqn.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/rainbow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/rainbow.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/sac.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/td3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/td3.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/design_imgs/wolpertinger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/design_imgs/wolpertinger.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/distributed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/distributed.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/filters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/filters.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/full_discrete_action_space_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/full_discrete_action_space_map.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/graph.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/horizontal-scale-out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/horizontal-scale-out.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/improve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/improve.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/level.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/level.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/linear_box_to_box_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/linear_box_to_box_map.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/network.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/observe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/observe.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/partial_discrete_action_space_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/partial_discrete_action_space_map.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/separate_signals.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/separate_signals.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/train.png
--------------------------------------------------------------------------------
/docs_raw/source/_static/img/updating_dynamically.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/docs_raw/source/_static/img/updating_dynamically.gif
--------------------------------------------------------------------------------
/docs_raw/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 | {% block extrahead %}
3 |
4 | {% endblock %}
--------------------------------------------------------------------------------
/docs_raw/source/components/additional_parameters.rst:
--------------------------------------------------------------------------------
1 | Additional Parameters
2 | =====================
3 |
4 | VisualizationParameters
5 | -----------------------
6 | .. autoclass:: rl_coach.base_parameters.VisualizationParameters
7 |
8 | PresetValidationParameters
9 | --------------------------
10 | .. autoclass:: rl_coach.base_parameters.PresetValidationParameters
11 |
12 | TaskParameters
13 | --------------
14 | .. autoclass:: rl_coach.base_parameters.TaskParameters
15 |
16 | DistributedTaskParameters
17 | -------------------------
18 | .. autoclass:: rl_coach.base_parameters.DistributedTaskParameters
19 |
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/imitation/bc.rst:
--------------------------------------------------------------------------------
1 | Behavioral Cloning
2 | ==================
3 |
4 | **Actions space:** Discrete | Continuous
5 |
6 | Network Structure
7 | -----------------
8 |
9 | .. image:: /_static/img/design_imgs/pg.png
10 | :align: center
11 |
12 |
13 | Algorithm Description
14 | ---------------------
15 |
16 | Training the network
17 | ++++++++++++++++++++
18 |
19 | The replay buffer contains the expert demonstrations for the task.
20 | These demonstrations are given as state, action tuples, and with no reward.
21 | The training goal is to reduce the difference between the actions predicted by the network and the actions taken by
22 | the expert for each state.
23 |
24 | 1. Sample a batch of transitions from the replay buffer.
25 | 2. Use the current states as input to the network, and the expert actions as the targets of the network.
26 | 3. For the network head, we use the policy head, which uses the cross entropy loss function.
27 |
28 |
29 | .. autoclass:: rl_coach.agents.bc_agent.BCAlgorithmParameters
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/index.rst:
--------------------------------------------------------------------------------
1 | Agents
2 | ======
3 |
4 | Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into three main classes -
5 | value optimization, policy optimization and imitation learning.
6 | A detailed description of those algorithms can be found by navigating to each of the algorithm pages.
7 |
8 | .. image:: /_static/img/algorithms.png
9 | :width: 600px
10 | :align: center
11 |
12 | .. toctree::
13 | :maxdepth: 1
14 | :caption: Agents
15 |
16 | policy_optimization/ac
17 | policy_optimization/acer
18 | imitation/bc
19 | value_optimization/bs_dqn
20 | value_optimization/categorical_dqn
21 | imitation/cil
22 | policy_optimization/cppo
23 | policy_optimization/ddpg
24 | other/dfp
25 | value_optimization/double_dqn
26 | value_optimization/dqn
27 | value_optimization/dueling_dqn
28 | value_optimization/mmc
29 | value_optimization/n_step
30 | value_optimization/naf
31 | value_optimization/nec
32 | value_optimization/pal
33 | policy_optimization/pg
34 | policy_optimization/ppo
35 | value_optimization/rainbow
36 | value_optimization/qr_dqn
37 | policy_optimization/sac
38 | policy_optimization/td3
39 | policy_optimization/wolpertinger
40 |
41 |
42 |
43 | .. autoclass:: rl_coach.base_parameters.AgentParameters
44 |
45 | .. autoclass:: rl_coach.agents.agent.Agent
46 | :members:
47 | :inherited-members:
48 |
49 |
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/policy_optimization/ac.rst:
--------------------------------------------------------------------------------
1 | Actor-Critic
2 | ============
3 |
4 | **Actions space:** Discrete | Continuous
5 |
6 | **References:** `Asynchronous Methods for Deep Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/ac.png
12 | :width: 500px
13 | :align: center
14 |
15 | Algorithm Description
16 | ---------------------
17 |
18 | Choosing an action - Discrete actions
19 | +++++++++++++++++++++++++++++++++++++
20 |
21 | The policy network is used in order to predict action probabilites. While training, a sample is taken from a categorical
22 | distribution assigned with these probabilities. When testing, the action with the highest probability is used.
23 |
24 | Training the network
25 | ++++++++++++++++++++
26 | A batch of :math:`T_{max}` transitions is used, and the advantages are calculated upon it.
27 |
28 | Advantages can be calculated by either of the following methods (configured by the selected preset) -
29 |
30 | 1. **A_VALUE** - Estimating advantage directly:
31 | :math:`A(s_t, a_t) = \underbrace{\sum_{i=t}^{i=t + k - 1} \gamma^{i-t}r_i +\gamma^{k} V(s_{t+k})}_{Q(s_t, a_t)} - V(s_t)`
32 | where :math:`k` is :math:`T_{max} - State\_Index` for each state in the batch.
33 |
34 | 2. **GAE** - By following the `Generalized Advantage Estimation `_ paper.
35 |
36 | The advantages are then used in order to accumulate gradients according to
37 | :math:`L = -\mathop{\mathbb{E}} [log (\pi) \cdot A]`
38 |
39 |
40 | .. autoclass:: rl_coach.agents.actor_critic_agent.ActorCriticAlgorithmParameters
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/policy_optimization/hac.rst:
--------------------------------------------------------------------------------
1 | Hierarchical Actor Critic
2 | =========================
3 |
4 | **Actions space:** Continuous
5 |
6 | **References:** `Hierarchical Reinforcement Learning with Hindsight `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/ddpg.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 | Choosing an action
17 | ++++++++++++++++++
18 |
19 | Pass the current states through the actor network, and get an action mean vector :math:`\mu`.
20 | While in training phase, use a continuous exploration policy, such as the Ornstein-Uhlenbeck process,
21 | to add exploration noise to the action. When testing, use the mean vector :math:`\mu` as-is.
22 |
23 | Training the network
24 | ++++++++++++++++++++
25 |
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/value_optimization/categorical_dqn.rst:
--------------------------------------------------------------------------------
1 | Categorical DQN
2 | ===============
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `A Distributional Perspective on Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/distributional_dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | 1. Sample a batch of transitions from the replay buffer.
21 |
22 | 2. The Bellman update is projected to the set of atoms representing the :math:`Q` values distribution, such
23 | that the :math:`i-th` component of the projected update is calculated as follows:
24 |
25 | :math:`(\Phi \hat{T} Z_{\theta}(s_t,a_t))_i=\sum_{j=0}^{N-1}\Big[1-\frac{\lvert[\hat{T}_{z_{j}}]^{V_{MAX}}_{V_{MIN}}-z_i\rvert}{\Delta z}\Big]^1_0 \ p_j(s_{t+1}, \pi(s_{t+1}))`
26 |
27 | where:
28 | * :math:`[ \cdot ]` bounds its argument in the range :math:`[a, b]`
29 | * :math:`\hat{T}_{z_{j}}` is the Bellman update for atom :math:`z_j`: :math:`\hat{T}_{z_{j}} := r+\gamma z_j`
30 |
31 |
32 | 3. Network is trained with the cross entropy loss between the resulting probability distribution and the target
33 | probability distribution. Only the target of the actions that were actually taken is updated.
34 |
35 | 4. Once in every few thousand steps, weights are copied from the online network to the target network.
36 |
37 |
38 |
39 | .. autoclass:: rl_coach.agents.categorical_dqn_agent.CategoricalDQNAlgorithmParameters
40 |
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/value_optimization/double_dqn.rst:
--------------------------------------------------------------------------------
1 | Double DQN
2 | ==========
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Deep Reinforcement Learning with Double Q-learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | 1. Sample a batch of transitions from the replay buffer.
21 |
22 | 2. Using the next states from the sampled batch, run the online network in order to find the :math:`Q` maximizing
23 | action :math:`argmax_a Q(s_{t+1},a)`. For these actions, use the corresponding next states and run the target
24 | network to calculate :math:`Q(s_{t+1},argmax_a Q(s_{t+1},a))`.
25 |
26 | 3. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss),
27 | use the current states from the sampled batch, and run the online network to get the current Q values predictions.
28 | Set those values as the targets for the actions that were not actually played.
29 |
30 | 4. For each action that was played, use the following equation for calculating the targets of the network:
31 | :math:`y_t=r(s_t,a_t )+\gamma \cdot Q(s_{t+1},argmax_a Q(s_{t+1},a))`
32 |
33 | 5. Finally, train the online network using the current states as inputs, and with the aforementioned targets.
34 |
35 | 6. Once in every few thousand steps, copy the weights from the online network to the target network.
36 |
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/value_optimization/dqn.rst:
--------------------------------------------------------------------------------
1 | Deep Q Networks
2 | ===============
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Playing Atari with Deep Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | 1. Sample a batch of transitions from the replay buffer.
21 |
22 | 2. Using the next states from the sampled batch, run the target network to calculate the :math:`Q` values for each of
23 | the actions :math:`Q(s_{t+1},a)`, and keep only the maximum value for each state.
24 |
25 | 3. In order to zero out the updates for the actions that were not played (resulting from zeroing the MSE loss),
26 | use the current states from the sampled batch, and run the online network to get the current Q values predictions.
27 | Set those values as the targets for the actions that were not actually played.
28 |
29 | 4. For each action that was played, use the following equation for calculating the targets of the network:
30 | :math:`y_t=r(s_t,a_t )+\gamma \cdot max_a Q(s_{t+1})`
31 |
32 | 5. Finally, train the online network using the current states as inputs, and with the aforementioned targets.
33 |
34 | 6. Once in every few thousand steps, copy the weights from the online network to the target network.
35 |
36 |
37 | .. autoclass:: rl_coach.agents.dqn_agent.DQNAlgorithmParameters
38 |
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/value_optimization/dueling_dqn.rst:
--------------------------------------------------------------------------------
1 | Dueling DQN
2 | ===========
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Dueling Network Architectures for Deep Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dueling_dqn.png
12 | :align: center
13 |
14 | General Description
15 | -------------------
16 | Dueling DQN presents a change in the network structure comparing to DQN.
17 |
18 | Dueling DQN uses a specialized *Dueling Q Head* in order to separate :math:`Q` to an :math:`A` (advantage)
19 | stream and a :math:`V` stream. Adding this type of structure to the network head allows the network to better differentiate
20 | actions from one another, and significantly improves the learning.
21 |
22 | In many states, the values of the different actions are very similar, and it is less important which action to take.
23 | This is especially important in environments where there are many actions to choose from. In DQN, on each training
24 | iteration, for each of the states in the batch, we update the :ath:`Q` values only for the specific actions taken in
25 | those states. This results in slower learning as we do not learn the :math:`Q` values for actions that were not taken yet.
26 | On dueling architecture, on the other hand, learning is faster - as we start learning the state-value even if only a
27 | single action has been taken at this state.
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/value_optimization/mmc.rst:
--------------------------------------------------------------------------------
1 | Mixed Monte Carlo
2 | =================
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Count-Based Exploration with Neural Density Models `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 | Training the network
17 | ++++++++++++++++++++
18 |
19 | In MMC, targets are calculated as a mixture between Double DQN targets and full Monte Carlo samples (total discounted returns).
20 |
21 | The DDQN targets are calculated in the same manner as in the DDQN agent:
22 |
23 | :math:`y_t^{DDQN}=r(s_t,a_t )+\gamma Q(s_{t+1},argmax_a Q(s_{t+1},a))`
24 |
25 | The Monte Carlo targets are calculated by summing up the discounted rewards across the entire episode:
26 |
27 | :math:`y_t^{MC}=\sum_{j=0}^T\gamma^j r(s_{t+j},a_{t+j} )`
28 |
29 | A mixing ratio $\alpha$ is then used to get the final targets:
30 |
31 | :math:`y_t=(1-\alpha)\cdot y_t^{DDQN}+\alpha \cdot y_t^{MC}`
32 |
33 | Finally, the online network is trained using the current states as inputs, and the calculated targets.
34 | Once in every few thousand steps, copy the weights from the online network to the target network.
35 |
36 |
37 | .. autoclass:: rl_coach.agents.mmc_agent.MixedMonteCarloAlgorithmParameters
38 |
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/value_optimization/n_step.rst:
--------------------------------------------------------------------------------
1 | N-Step Q Learning
2 | =================
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Asynchronous Methods for Deep Reinforcement Learning `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | The :math:`N`-step Q learning algorithm works in similar manner to DQN except for the following changes:
21 |
22 | 1. No replay buffer is used. Instead of sampling random batches of transitions, the network is trained every
23 | :math:`N` steps using the latest :math:`N` steps played by the agent.
24 |
25 | 2. In order to stabilize the learning, multiple workers work together to update the network.
26 | This creates the same effect as uncorrelating the samples used for training.
27 |
28 | 3. Instead of using single-step Q targets for the network, the rewards from $N$ consequent steps are accumulated
29 | to form the :math:`N`-step Q targets, according to the following equation:
30 | :math:`R(s_t, a_t) = \sum_{i=t}^{i=t + k - 1} \gamma^{i-t}r_i +\gamma^{k} V(s_{t+k})`
31 | where :math:`k` is :math:`T_{max} - State\_Index` for each state in the batch
32 |
33 |
34 |
35 | .. autoclass:: rl_coach.agents.n_step_q_agent.NStepQAlgorithmParameters
36 |
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/value_optimization/naf.rst:
--------------------------------------------------------------------------------
1 | Normalized Advantage Functions
2 | ==============================
3 |
4 | **Actions space:** Continuous
5 |
6 | **References:** `Continuous Deep Q-Learning with Model-based Acceleration `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/naf.png
12 | :width: 600px
13 | :align: center
14 |
15 | Algorithm Description
16 | ---------------------
17 | Choosing an action
18 | ++++++++++++++++++
19 | The current state is used as an input to the network. The action mean :math:`\mu(s_t )` is extracted from the output head.
20 | It is then passed to the exploration policy which adds noise in order to encourage exploration.
21 |
22 | Training the network
23 | ++++++++++++++++++++
24 | The network is trained by using the following targets:
25 | :math:`y_t=r(s_t,a_t )+\gamma\cdot V(s_{t+1})`
26 | Use the next states as the inputs to the target network and extract the :math:`V` value, from within the head,
27 | to get :math:`V(s_{t+1} )`. Then, update the online network using the current states and actions as inputs,
28 | and :math:`y_t` as the targets.
29 | After every training step, use a soft update in order to copy the weights from the online network to the target network.
30 |
31 |
32 |
33 | .. autoclass:: rl_coach.agents.naf_agent.NAFAlgorithmParameters
34 |
--------------------------------------------------------------------------------
/docs_raw/source/components/agents/value_optimization/qr_dqn.rst:
--------------------------------------------------------------------------------
1 | Quantile Regression DQN
2 | =======================
3 |
4 | **Actions space:** Discrete
5 |
6 | **References:** `Distributional Reinforcement Learning with Quantile Regression `_
7 |
8 | Network Structure
9 | -----------------
10 |
11 | .. image:: /_static/img/design_imgs/qr_dqn.png
12 | :align: center
13 |
14 | Algorithm Description
15 | ---------------------
16 |
17 | Training the network
18 | ++++++++++++++++++++
19 |
20 | 1. Sample a batch of transitions from the replay buffer.
21 |
22 | 2. First, the next state quantiles are predicted. These are used in order to calculate the targets for the network,
23 | by following the Bellman equation.
24 | Next, the current quantile locations for the current states are predicted, sorted, and used for calculating the
25 | quantile midpoints targets.
26 |
27 | 3. The network is trained with the quantile regression loss between the resulting quantile locations and the target
28 | quantile locations. Only the targets of the actions that were actually taken are updated.
29 |
30 | 4. Once in every few thousand steps, weights are copied from the online network to the target network.
31 |
32 |
33 | .. autoclass:: rl_coach.agents.qr_dqn_agent.QuantileRegressionDQNAlgorithmParameters
--------------------------------------------------------------------------------
/docs_raw/source/components/architectures/index.rst:
--------------------------------------------------------------------------------
1 | Architectures
2 | =============
3 |
4 | Architectures contain all the classes that implement the neural network related stuff for the agent.
5 | Since Coach is intended to work with multiple neural network frameworks, each framework will implement its
6 | own components under a dedicated directory. For example, tensorflow components will contain all the neural network
7 | parts that are implemented using TensorFlow.
8 |
9 | .. autoclass:: rl_coach.base_parameters.NetworkParameters
10 |
11 | Architecture
12 | ------------
13 | .. autoclass:: rl_coach.architectures.architecture.Architecture
14 | :members:
15 | :inherited-members:
16 |
17 | NetworkWrapper
18 | --------------
19 |
20 | .. image:: /_static/img/distributed.png
21 | :width: 600px
22 | :align: center
23 |
24 | .. autoclass:: rl_coach.architectures.network_wrapper.NetworkWrapper
25 | :members:
26 | :inherited-members:
27 |
28 |
--------------------------------------------------------------------------------
/docs_raw/source/components/core_types.rst:
--------------------------------------------------------------------------------
1 | Core Types
2 | ==========
3 |
4 | ActionInfo
5 | ----------
6 | .. autoclass:: rl_coach.core_types.ActionInfo
7 | :members:
8 | :inherited-members:
9 |
10 | Batch
11 | -----
12 | .. autoclass:: rl_coach.core_types.Batch
13 | :members:
14 | :inherited-members:
15 |
16 | EnvResponse
17 | -----------
18 | .. autoclass:: rl_coach.core_types.EnvResponse
19 | :members:
20 | :inherited-members:
21 |
22 | Episode
23 | -------
24 | .. autoclass:: rl_coach.core_types.Episode
25 | :members:
26 | :inherited-members:
27 |
28 | Transition
29 | ----------
30 | .. autoclass:: rl_coach.core_types.Transition
31 | :members:
32 | :inherited-members:
33 |
34 |
--------------------------------------------------------------------------------
/docs_raw/source/components/data_stores/index.rst:
--------------------------------------------------------------------------------
1 | Data Stores
2 | ===========
3 |
4 | S3DataStore
5 | -----------
6 | .. autoclass:: rl_coach.data_stores.s3_data_store.S3DataStore
7 |
8 | NFSDataStore
9 | ------------
10 | .. autoclass:: rl_coach.data_stores.nfs_data_store.NFSDataStore
11 |
--------------------------------------------------------------------------------
/docs_raw/source/components/filters/index.rst:
--------------------------------------------------------------------------------
1 | Filters
2 | =======
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 | :caption: Filters
7 |
8 | input_filters
9 | output_filters
10 |
11 | Filters are a mechanism in Coach that allows doing pre-processing and post-processing of the internal agent information.
12 | There are two filter categories -
13 |
14 | * **Input filters** - these are filters that process the information passed **into** the agent from the environment.
15 | This information includes the observation and the reward. Input filters therefore allow rescaling observations,
16 | normalizing rewards, stack observations, etc.
17 |
18 | * **Output filters** - these are filters that process the information going **out** of the agent into the environment.
19 | This information includes the action the agent chooses to take. Output filters therefore allow conversion of
20 | actions from one space into another. For example, the agent can take :math:`N` discrete actions, that will be mapped by
21 | the output filter onto :math:`N` continuous actions.
22 |
23 | Filters can be stacked on top of each other in order to build complex processing flows of the inputs or outputs.
24 |
25 | .. image:: /_static/img/filters.png
26 | :width: 350px
27 | :align: center
28 |
29 |
--------------------------------------------------------------------------------
/docs_raw/source/components/filters/output_filters.rst:
--------------------------------------------------------------------------------
1 | Output Filters
2 | --------------
3 |
4 | The output filters only process the actions.
5 |
6 | Action Filters
7 | ++++++++++++++
8 |
9 | .. autoclass:: rl_coach.filters.action.AttentionDiscretization
10 |
11 | .. image:: /_static/img/attention_discretization.png
12 | :align: center
13 |
14 | .. autoclass:: rl_coach.filters.action.BoxDiscretization
15 |
16 | .. image:: /_static/img/box_discretization.png
17 | :align: center
18 |
19 | .. autoclass:: rl_coach.filters.action.BoxMasking
20 |
21 | .. image:: /_static/img/box_masking.png
22 | :align: center
23 |
24 | .. autoclass:: rl_coach.filters.action.PartialDiscreteActionSpaceMap
25 |
26 | .. image:: /_static/img/partial_discrete_action_space_map.png
27 | :align: center
28 |
29 | .. autoclass:: rl_coach.filters.action.FullDiscreteActionSpaceMap
30 |
31 | .. image:: /_static/img/full_discrete_action_space_map.png
32 | :align: center
33 |
34 | .. autoclass:: rl_coach.filters.action.LinearBoxToBoxMap
35 |
36 | .. image:: /_static/img/linear_box_to_box_map.png
37 | :align: center
--------------------------------------------------------------------------------
/docs_raw/source/components/memories/index.rst:
--------------------------------------------------------------------------------
1 | Memories
2 | ========
3 |
4 | Episodic Memories
5 | -----------------
6 |
7 | EpisodicExperienceReplay
8 | ++++++++++++++++++++++++
9 | .. autoclass:: rl_coach.memories.episodic.EpisodicExperienceReplay
10 |
11 | EpisodicHindsightExperienceReplay
12 | +++++++++++++++++++++++++++++++++
13 | .. autoclass:: rl_coach.memories.episodic.EpisodicHindsightExperienceReplay
14 |
15 | EpisodicHRLHindsightExperienceReplay
16 | ++++++++++++++++++++++++++++++++++++
17 | .. autoclass:: rl_coach.memories.episodic.EpisodicHRLHindsightExperienceReplay
18 |
19 | SingleEpisodeBuffer
20 | +++++++++++++++++++
21 | .. autoclass:: rl_coach.memories.episodic.SingleEpisodeBuffer
22 |
23 |
24 | Non-Episodic Memories
25 | ---------------------
26 | BalancedExperienceReplay
27 | ++++++++++++++++++++++++
28 | .. autoclass:: rl_coach.memories.non_episodic.BalancedExperienceReplay
29 |
30 | QDND
31 | ++++
32 | .. autoclass:: rl_coach.memories.non_episodic.QDND
33 |
34 | ExperienceReplay
35 | ++++++++++++++++
36 | .. autoclass:: rl_coach.memories.non_episodic.ExperienceReplay
37 |
38 | PrioritizedExperienceReplay
39 | +++++++++++++++++++++++++++
40 | .. autoclass:: rl_coach.memories.non_episodic.PrioritizedExperienceReplay
41 |
42 | TransitionCollection
43 | ++++++++++++++++++++
44 | .. autoclass:: rl_coach.memories.non_episodic.TransitionCollection
45 |
--------------------------------------------------------------------------------
/docs_raw/source/components/memory_backends/index.rst:
--------------------------------------------------------------------------------
1 | Memory Backends
2 | ===============
3 |
4 | RedisPubSubBackend
5 | ------------------
6 | .. autoclass:: rl_coach.memories.backend.redis.RedisPubSubBackend
7 |
--------------------------------------------------------------------------------
/docs_raw/source/components/orchestrators/index.rst:
--------------------------------------------------------------------------------
1 | Orchestrators
2 | =============
3 |
4 |
5 | Kubernetes
6 | ----------
7 | .. autoclass:: rl_coach.orchestrators.kubernetes_orchestrator.Kubernetes
8 |
--------------------------------------------------------------------------------
/docs_raw/source/components/spaces.rst:
--------------------------------------------------------------------------------
1 | Spaces
2 | ======
3 |
4 | Space
5 | -----
6 | .. autoclass:: rl_coach.spaces.Space
7 | :members:
8 | :inherited-members:
9 |
10 |
11 |
12 | Observation Spaces
13 | ------------------
14 | .. autoclass:: rl_coach.spaces.ObservationSpace
15 | :members:
16 | :inherited-members:
17 |
18 | VectorObservationSpace
19 | ++++++++++++++++++++++
20 | .. autoclass:: rl_coach.spaces.VectorObservationSpace
21 |
22 | PlanarMapsObservationSpace
23 | ++++++++++++++++++++++++++
24 | .. autoclass:: rl_coach.spaces.PlanarMapsObservationSpace
25 |
26 | ImageObservationSpace
27 | +++++++++++++++++++++
28 | .. autoclass:: rl_coach.spaces.ImageObservationSpace
29 |
30 |
31 |
32 | Action Spaces
33 | -------------
34 | .. autoclass:: rl_coach.spaces.ActionSpace
35 | :members:
36 | :inherited-members:
37 |
38 | AttentionActionSpace
39 | ++++++++++++++++++++
40 | .. autoclass:: rl_coach.spaces.AttentionActionSpace
41 |
42 | BoxActionSpace
43 | ++++++++++++++
44 | .. autoclass:: rl_coach.spaces.BoxActionSpace
45 |
46 | DiscreteActionSpace
47 | ++++++++++++++++++++
48 | .. autoclass:: rl_coach.spaces.DiscreteActionSpace
49 |
50 | MultiSelectActionSpace
51 | ++++++++++++++++++++++
52 | .. autoclass:: rl_coach.spaces.MultiSelectActionSpace
53 |
54 | CompoundActionSpace
55 | +++++++++++++++++++
56 | .. autoclass:: rl_coach.spaces.CompoundActionSpace
57 |
58 |
59 |
60 | Goal Spaces
61 | -----------
62 | .. autoclass:: rl_coach.spaces.GoalsSpace
63 | :members:
64 | :inherited-members:
65 |
--------------------------------------------------------------------------------
/docs_raw/source/features/algorithms.rst:
--------------------------------------------------------------------------------
1 | Algorithms
2 | ==========
3 |
4 | Coach supports many state-of-the-art reinforcement learning algorithms, which are separated into three main classes -
5 | value optimization, policy optimization and imitation learning.
6 | A detailed description of those algorithms may be found in the `agents <../components/agents/index.html>`_ section.
7 |
8 | .. image:: /_static/img/algorithms.png
9 | :width: 600px
10 | :align: center
--------------------------------------------------------------------------------
/docs_raw/source/features/batch_rl.rst:
--------------------------------------------------------------------------------
1 | Batch Reinforcement Learning
2 | ============================
3 |
4 | Coach supports Batch Reinforcement Learning, where learning is based solely on a (fixed) batch of data.
5 | In Batch RL, we are given a dataset of experience, which was collected using some (one or more) deployed policies, and we would
6 | like to use it to learn a better policy than what was used to collect the dataset.
7 | There is no simulator to interact with, and so we cannot collect any new data, meaning we often cannot explore the MDP any further.
8 | To make things even harder, we would also like to use the dataset in order to evaluate the newly learned policy
9 | (using off-policy evaluation), since we do not have a simulator which we can use to evaluate the policy on.
10 | Batch RL is also often beneficial in cases where we just want to separate the inference (data collection) from the
11 | training process of a new policy. This is often the case where we have a system on which we could quite easily deploy a policy
12 | and collect experience data, but cannot easily use that system's setup to online train a new policy (as is often the
13 | case with more standard RL algorithms).
14 |
15 | Coach supports (almost) all of the integrated off-policy algorithms with Batch RL.
16 |
17 | A lot more details and example usage can be found in the
18 | `tutorial `_.
--------------------------------------------------------------------------------
/docs_raw/source/features/benchmarks.rst:
--------------------------------------------------------------------------------
1 | Benchmarks
2 | ==========
3 |
4 | Reinforcement learning is a developing field, and so far it has been particularly difficult to reproduce some of the
5 | results published in the original papers. Some reasons for this are:
6 |
7 | * Reinforcement learning algorithms are notoriously known as having an unstable learning process.
8 | The data the neural networks trains on is dynamic, and depends on the random seed defined for the environment.
9 |
10 | * Reinforcement learning algorithms have many moving parts. For some environments and agents, there are many
11 | "tricks" which are needed to get the exact behavior the paper authors had seen. Also, there are **a lot** of
12 | hyper-parameters to set.
13 |
14 | In order for a reinforcement learning implementation to be useful for research or for data science, it must be
15 | shown that it achieves the expected behavior. For this reason, we collected a set of benchmark results from most
16 | of the algorithms implemented in Coach. The algorithms were tested on a subset of the same environments that were
17 | used in the original papers, and with multiple seed for each environment.
18 | Additionally, Coach uses some strict testing mechanisms to try and make sure the results we show for these
19 | benchmarks stay intact as Coach continues to develop.
20 |
21 | To see the benchmark results, please visit the
22 | `following GitHub page `_.
--------------------------------------------------------------------------------
/docs_raw/source/features/index.rst:
--------------------------------------------------------------------------------
1 | Features
2 | ========
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 | :caption: Features
7 |
8 | algorithms
9 | environments
10 | benchmarks
11 | batch_rl
--------------------------------------------------------------------------------
/docs_raw/source/test.rst:
--------------------------------------------------------------------------------
1 | test
2 | ----
3 |
4 | .. important:: Its a note! in markdown!
5 |
6 | .. autoclass:: rl_coach.agents.dqn_agent.DQNAgent
7 | :members:
8 | :inherited-members:
--------------------------------------------------------------------------------
/img/ant.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/ant.gif
--------------------------------------------------------------------------------
/img/carla.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/carla.gif
--------------------------------------------------------------------------------
/img/coach_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/coach_logo.png
--------------------------------------------------------------------------------
/img/dashboard.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/dashboard.gif
--------------------------------------------------------------------------------
/img/dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/dashboard.png
--------------------------------------------------------------------------------
/img/doom_deathmatch.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/doom_deathmatch.gif
--------------------------------------------------------------------------------
/img/doom_health.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/doom_health.gif
--------------------------------------------------------------------------------
/img/fetch_slide.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/fetch_slide.gif
--------------------------------------------------------------------------------
/img/minitaur.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/minitaur.gif
--------------------------------------------------------------------------------
/img/montezuma.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/montezuma.gif
--------------------------------------------------------------------------------
/img/pendulum.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/pendulum.gif
--------------------------------------------------------------------------------
/img/starcraft.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/img/starcraft.gif
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | annoy>=1.8.3
2 | Pillow>=9.0.1
3 | matplotlib>=2.0.2
4 | numpy>=1.14.5
5 | pandas>=0.22.0
6 | pygame>=1.9.3
7 | PyOpenGL>=3.1.0
8 | scipy>=0.19.0
9 | scikit-image>=0.13.0
10 | gym==0.12.5
11 | bokeh==1.0.4
12 | kubernetes>=8.0.0b1,<=8.0.1
13 | redis>=2.10.6
14 | minio>=4.0.5
15 | pytest>=3.8.2
16 | psutil>=5.5.0
17 | joblib>=0.17.0
18 |
--------------------------------------------------------------------------------
/rl_coach/__init__.py:
--------------------------------------------------------------------------------
1 | # This gets rid of NumPy FutureWarnings that occur at TF import
2 | import warnings
3 | warnings.filterwarnings('ignore',category=FutureWarning)
4 |
5 | # This gets rid of TF 2.0 related deprecation warnings
6 | import tensorflow as tf
7 | tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
8 |
--------------------------------------------------------------------------------
/rl_coach/agents/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
--------------------------------------------------------------------------------
/rl_coach/architectures/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
--------------------------------------------------------------------------------
/rl_coach/architectures/mxnet_components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/architectures/mxnet_components/__init__.py
--------------------------------------------------------------------------------
/rl_coach/architectures/mxnet_components/embedders/__init__.py:
--------------------------------------------------------------------------------
1 | from .image_embedder import ImageEmbedder
2 | from .tensor_embedder import TensorEmbedder
3 | from .vector_embedder import VectorEmbedder
4 |
5 | __all__ = ['ImageEmbedder',
6 | 'TensorEmbedder',
7 | 'VectorEmbedder']
8 |
--------------------------------------------------------------------------------
/rl_coach/architectures/mxnet_components/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .head import Head, HeadLoss
2 | from .q_head import QHead
3 | from .ppo_head import PPOHead
4 | from .ppo_v_head import PPOVHead
5 | from .v_head import VHead
6 |
7 | __all__ = [
8 | 'Head',
9 | 'HeadLoss',
10 | 'QHead',
11 | 'PPOHead',
12 | 'PPOVHead',
13 | 'VHead'
14 | ]
15 |
--------------------------------------------------------------------------------
/rl_coach/architectures/mxnet_components/middlewares/__init__.py:
--------------------------------------------------------------------------------
1 | from .fc_middleware import FCMiddleware
2 | from .lstm_middleware import LSTMMiddleware
3 |
4 | __all__ = ["FCMiddleware", "LSTMMiddleware"]
--------------------------------------------------------------------------------
/rl_coach/architectures/tensorflow_components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/architectures/tensorflow_components/__init__.py
--------------------------------------------------------------------------------
/rl_coach/architectures/tensorflow_components/embedders/__init__.py:
--------------------------------------------------------------------------------
1 | from .image_embedder import ImageEmbedder
2 | from .vector_embedder import VectorEmbedder
3 | from .tensor_embedder import TensorEmbedder
4 |
5 | __all__ = ['ImageEmbedder', 'VectorEmbedder', 'TensorEmbedder']
6 |
--------------------------------------------------------------------------------
/rl_coach/architectures/tensorflow_components/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .q_head import QHead
2 | from .categorical_q_head import CategoricalQHead
3 | from .ddpg_actor_head import DDPGActor
4 | from .dnd_q_head import DNDQHead
5 | from .dueling_q_head import DuelingQHead
6 | from .measurements_prediction_head import MeasurementsPredictionHead
7 | from .naf_head import NAFHead
8 | from .policy_head import PolicyHead
9 | from .ppo_head import PPOHead
10 | from .ppo_v_head import PPOVHead
11 | from .quantile_regression_q_head import QuantileRegressionQHead
12 | from .rainbow_q_head import RainbowQHead
13 | from .v_head import VHead
14 | from .acer_policy_head import ACERPolicyHead
15 | from .sac_head import SACPolicyHead
16 | from .sac_q_head import SACQHead
17 | from .classification_head import ClassificationHead
18 | from .cil_head import RegressionHead
19 | from .td3_v_head import TD3VHead
20 | from .ddpg_v_head import DDPGVHead
21 | from .wolpertinger_actor_head import WolpertingerActorHead
22 | from .RND_head import RNDHead
23 |
24 | __all__ = [
25 | 'CategoricalQHead',
26 | 'DDPGActor',
27 | 'DNDQHead',
28 | 'DuelingQHead',
29 | 'MeasurementsPredictionHead',
30 | 'NAFHead',
31 | 'PolicyHead',
32 | 'PPOHead',
33 | 'PPOVHead',
34 | 'QHead',
35 | 'QuantileRegressionQHead',
36 | 'RainbowQHead',
37 | 'VHead',
38 | 'ACERPolicyHead',
39 | 'SACPolicyHead',
40 | 'SACQHead',
41 | 'ClassificationHead',
42 | 'RegressionHead',
43 | 'TD3VHead',
44 | 'DDPGVHead',
45 | 'WolpertingerActorHead',
46 | 'RNDHead'
47 | ]
48 |
--------------------------------------------------------------------------------
/rl_coach/architectures/tensorflow_components/middlewares/__init__.py:
--------------------------------------------------------------------------------
1 | from .fc_middleware import FCMiddleware
2 | from .lstm_middleware import LSTMMiddleware
3 |
4 | __all__ = ["FCMiddleware", "LSTMMiddleware"]
5 |
--------------------------------------------------------------------------------
/rl_coach/dashboard_components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/dashboard_components/__init__.py
--------------------------------------------------------------------------------
/rl_coach/dashboard_components/boards.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 |
18 | from bokeh.layouts import column
19 | from bokeh.models.widgets import Panel, Tabs
20 | from rl_coach.dashboard_components.experiment_board import experiment_board_layout
21 | from rl_coach.dashboard_components.episodic_board import episodic_board_layout
22 | from rl_coach.dashboard_components.globals import spinner, layouts
23 | from bokeh.models.widgets import Div
24 |
25 | # ---------------- Build Website Layout -------------------
26 |
27 | # title
28 | title = Div(text="""Coach Dashboard
""")
29 | center = Div(text="""""")
30 | tab1 = Panel(child=experiment_board_layout, title='experiment board')
31 | # tab2 = Panel(child=episodic_board_layout, title='episodic board')
32 | # tabs = Tabs(tabs=[tab1, tab2])
33 | tabs = Tabs(tabs=[tab1])
34 |
35 | layout = column(title, center, tabs)
36 | layout = column(layout, spinner)
37 |
38 | layouts['boards'] = layout
39 |
--------------------------------------------------------------------------------
/rl_coach/data_stores/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
--------------------------------------------------------------------------------
/rl_coach/environments/README.md:
--------------------------------------------------------------------------------
1 | A custom environment implementation should look like this:
2 |
3 | ```bash
4 | from coach.filters.input_filter import InputFilter
5 |
6 | class CustomFilter(InputFilter):
7 | def __init__(self):
8 | ...
9 | def _filter(self, env_response: EnvResponse) -> EnvResponse:
10 | ...
11 | def _get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace:
12 | ...
13 | def _get_filtered_reward_space(self, input_reward_space: RewardSpace) -> RewardSpace:
14 | ...
15 | def _validate_input_observation_space(self, input_observation_space: ObservationSpace):
16 | ...
17 | def _reset(self):
18 | ...
19 | ```
20 |
--------------------------------------------------------------------------------
/rl_coach/environments/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 |
--------------------------------------------------------------------------------
/rl_coach/environments/doom/D2_navigation.cfg:
--------------------------------------------------------------------------------
1 | # Lines starting with # are treated as comments (or with whitespaces+#).
2 | # It doesn't matter if you use capital letters or not.
3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
4 |
5 | doom_scenario_path = D2_navigation.wad
6 | doom_map = map01
7 |
8 | # Rewards
9 |
10 | # Each step is good for you!
11 | living_reward = 1
12 | # And death is not!
13 | death_penalty = 0
14 |
15 | # Rendering options
16 | screen_resolution = RES_160X120
17 | screen_format = GRAY8
18 | render_hud = false
19 | render_crosshair = false
20 | render_weapon = false
21 | render_decals = false
22 | render_particles = false
23 | window_visible = false
24 |
25 | # make episodes finish after 2100 actions (tics)
26 | episode_timeout = 2100
27 |
28 | # Available buttons
29 | available_buttons =
30 | {
31 | TURN_LEFT
32 | TURN_RIGHT
33 | MOVE_FORWARD
34 | }
35 |
36 | # Game variables that will be in the state
37 | available_game_variables = { HEALTH }
38 |
39 | mode = PLAYER
40 |
--------------------------------------------------------------------------------
/rl_coach/environments/doom/D2_navigation.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/environments/doom/D2_navigation.wad
--------------------------------------------------------------------------------
/rl_coach/environments/doom/D3_battle.cfg:
--------------------------------------------------------------------------------
1 | # Lines starting with # are treated as comments (or with whitespaces+#).
2 | # It doesn't matter if you use capital letters or not.
3 | # It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
4 |
5 | # modifty these to point to your vizdoom binary and freedoom2.wad
6 | doom_scenario_path = D3_battle.wad
7 | doom_map = map01
8 |
9 | # Rewards
10 |
11 | living_reward = 0
12 | death_penalty = 0
13 |
14 | # Rendering options
15 | screen_resolution = RES_320X240
16 | screen_format = CRCGCB
17 | render_hud = false
18 | render_crosshair = true
19 | render_weapon = true
20 | render_decals = false
21 | render_particles = false
22 | window_visible = false
23 |
24 | # make episodes finish after 2100 actions (tics)
25 | episode_timeout = 2100
26 |
27 | # Available buttons
28 | available_buttons =
29 | {
30 | MOVE_FORWARD
31 | MOVE_BACKWARD
32 | MOVE_RIGHT
33 | MOVE_LEFT
34 | TURN_LEFT
35 | TURN_RIGHT
36 | ATTACK
37 | SPEED
38 | }
39 |
40 | # Game variables that will be in the state
41 | available_game_variables = {AMMO2 HEALTH USER2}
42 |
43 | mode = PLAYER
44 | doom_skill = 2
45 |
--------------------------------------------------------------------------------
/rl_coach/environments/doom/D3_battle.wad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/environments/doom/D3_battle.wad
--------------------------------------------------------------------------------
/rl_coach/environments/mujoco/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/environments/mujoco/__init__.py
--------------------------------------------------------------------------------
/rl_coach/environments/mujoco/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The dm_control Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 |
16 | """Functions to manage the common assets for domains."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import os
23 | from dm_control.utils import resources
24 |
25 | _SUITE_DIR = os.path.dirname(os.path.dirname(__file__))
26 | _FILENAMES = [
27 | "common/materials.xml",
28 | "common/skybox.xml",
29 | "common/visual.xml",
30 | ]
31 |
32 | ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename))
33 | for filename in _FILENAMES}
34 |
35 |
36 | def read_model(model_filename):
37 | """Reads a model XML file and returns its contents as a string."""
38 | return resources.GetResource(os.path.join(_SUITE_DIR, model_filename))
39 |
--------------------------------------------------------------------------------
/rl_coach/environments/mujoco/common/materials.xml:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/rl_coach/environments/mujoco/common/skybox.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/rl_coach/environments/mujoco/common/visual.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/rl_coach/environments/robosuite/osc_pose.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "OSC_POSE",
3 | "input_max": 1,
4 | "input_min": -1,
5 | "output_max": [0.125, 0.125, 0.125, 0.5, 0.5, 0.5],
6 | "output_min": [-0.125, -0.125, -0.125, -0.5, -0.5, -0.5],
7 | "kp": 150,
8 | "damping_ratio": 1,
9 | "impedance_mode": "fixed",
10 | "kp_limits": [0, 300],
11 | "damping_ratio_limits": [0, 10],
12 | "position_limits": [[-0.22, -0.35, 0.82], [0.22, 0.35, 1.3]],
13 | "orientation_limits": null,
14 | "uncouple_pos_ori": true,
15 | "control_delta": true,
16 | "interpolation": null,
17 | "ramp_ratio": 0.2
18 | }
--------------------------------------------------------------------------------
/rl_coach/environments/toy_problems/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/environments/toy_problems/__init__.py
--------------------------------------------------------------------------------
/rl_coach/exploration_policies/README.md:
--------------------------------------------------------------------------------
1 | # Exploration Policy
2 |
3 | An exploration policy is a module that is responsible for choosing the action according to the action values, the
4 | current phase, its internal state and the specific exploration policy algorithm.
5 |
6 | A custom exploration policy should implement both the exploration policy class and the exploration policy parameters
7 | class, which defines the parameters and the location of the exploration policy module.
8 | The parameters of the exploration policy class should match the parameters in the exploration policy parameters class.
9 |
10 | Exploration policies typically have some control parameter that defines its current exploration state, and
11 | a schedule for this parameter. This schedule can be defined using the Schedule class which is defined in
12 | exploration_policy.py.
13 |
14 | A custom implementation should look as follows:
15 |
16 | ```
17 | class CustomExplorationParameters(ExplorationParameters):
18 | def __init__(self):
19 | super().__init__()
20 | ...
21 |
22 | @property
23 | def path(self):
24 | return 'module_path:class_name'
25 |
26 |
27 | class CustomExplorationPolicy(ExplorationPolicy):
28 | def __init__(self, action_space: ActionSpace, ...):
29 | super().__init__(action_space)
30 |
31 | def reset(self):
32 | ...
33 |
34 | def get_action(self, action_values: List[ActionType]) -> ActionType:
35 | ...
36 |
37 | def change_phase(self, phase):
38 | ...
39 |
40 | def get_control_param(self):
41 | ...
42 | ```
--------------------------------------------------------------------------------
/rl_coach/exploration_policies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/exploration_policies/__init__.py
--------------------------------------------------------------------------------
/rl_coach/filters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/filters/__init__.py
--------------------------------------------------------------------------------
/rl_coach/filters/action/__init__.py:
--------------------------------------------------------------------------------
1 | from .attention_discretization import AttentionDiscretization
2 | from .box_discretization import BoxDiscretization
3 | from .box_masking import BoxMasking
4 | from .full_discrete_action_space_map import FullDiscreteActionSpaceMap
5 | from .linear_box_to_box_map import LinearBoxToBoxMap
6 | from .partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
7 | __all__ = [
8 | 'AttentionDiscretization',
9 | 'BoxDiscretization',
10 | 'BoxMasking',
11 | 'FullDiscreteActionSpaceMap',
12 | 'LinearBoxToBoxMap',
13 | 'PartialDiscreteActionSpaceMap'
14 | ]
--------------------------------------------------------------------------------
/rl_coach/filters/action/full_discrete_action_space_map.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
18 | from rl_coach.spaces import ActionSpace, DiscreteActionSpace
19 |
20 |
21 | class FullDiscreteActionSpaceMap(PartialDiscreteActionSpaceMap):
22 | """
23 | Full map of two countable action spaces. This works in a similar way to the
24 | PartialDiscreteActionSpaceMap, but maps the entire source action space into the entire target action space, without
25 | masking any actions.
26 | For example, if there are 10 multiselect actions in the output space, the actions 0-9 will be mapped to those
27 | multiselect actions.
28 | """
29 | def __init__(self):
30 | super().__init__()
31 |
32 | def get_unfiltered_action_space(self, output_action_space: ActionSpace) -> DiscreteActionSpace:
33 | self.target_actions = output_action_space.actions
34 | return super().get_unfiltered_action_space(output_action_space)
35 |
--------------------------------------------------------------------------------
/rl_coach/filters/observation/__init__.py:
--------------------------------------------------------------------------------
1 | from .observation_clipping_filter import ObservationClippingFilter
2 | from .observation_crop_filter import ObservationCropFilter
3 | from .observation_move_axis_filter import ObservationMoveAxisFilter
4 | from .observation_normalization_filter import ObservationNormalizationFilter
5 | from .observation_reduction_by_sub_parts_name_filter import ObservationReductionBySubPartsNameFilter
6 | from .observation_rescale_size_by_factor_filter import ObservationRescaleSizeByFactorFilter
7 | from .observation_rescale_to_size_filter import ObservationRescaleToSizeFilter
8 | from .observation_rgb_to_y_filter import ObservationRGBToYFilter
9 | from .observation_squeeze_filter import ObservationSqueezeFilter
10 | from .observation_stacking_filter import ObservationStackingFilter
11 | from .observation_to_uint8_filter import ObservationToUInt8Filter
12 |
13 | __all__ = [
14 | 'ObservationClippingFilter',
15 | 'ObservationCropFilter',
16 | 'ObservationMoveAxisFilter',
17 | 'ObservationNormalizationFilter',
18 | 'ObservationReductionBySubPartsNameFilter',
19 | 'ObservationRescaleSizeByFactorFilter',
20 | 'ObservationRescaleToSizeFilter',
21 | 'ObservationRGBToYFilter',
22 | 'ObservationSqueezeFilter',
23 | 'ObservationStackingFilter',
24 | 'ObservationToUInt8Filter'
25 | ]
--------------------------------------------------------------------------------
/rl_coach/filters/observation/observation_filter.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | from rl_coach.filters.filter import Filter
18 | from rl_coach.spaces import ObservationSpace
19 |
20 |
21 | class ObservationFilter(Filter):
22 | def __init__(self):
23 | super().__init__()
24 | self.supports_batching = False
25 |
26 | def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace:
27 | """
28 | This function should contain the logic for getting the filtered observation space
29 | :param input_observation_space: the input observation space
30 | :return: the filtered observation space
31 | """
32 | return input_observation_space
33 |
34 | def validate_input_observation_space(self, input_observation_space: ObservationSpace):
35 | """
36 | A function that implements validation of the input observation space
37 | :param input_observation_space: the input observation space
38 | :return: None
39 | """
40 | pass
--------------------------------------------------------------------------------
/rl_coach/filters/reward/__init__.py:
--------------------------------------------------------------------------------
1 | from .reward_rescale_filter import RewardRescaleFilter
2 | from .reward_clipping_filter import RewardClippingFilter
3 | from .reward_normalization_filter import RewardNormalizationFilter
4 | from .reward_ewma_normalization_filter import RewardEwmaNormalizationFilter
5 |
6 | __all__ = [
7 | 'RewardRescaleFilter',
8 | 'RewardClippingFilter',
9 | 'RewardNormalizationFilter',
10 | 'RewardEwmaNormalizationFilter'
11 | ]
--------------------------------------------------------------------------------
/rl_coach/filters/reward/reward_filter.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | from rl_coach.filters.filter import Filter
18 | from rl_coach.spaces import RewardSpace
19 |
20 |
21 | class RewardFilter(Filter):
22 | def __init__(self):
23 | super().__init__()
24 | self.supports_batching = False
25 |
26 | def get_filtered_reward_space(self, input_reward_space: RewardSpace) -> RewardSpace:
27 | """
28 | This function should contain the logic for getting the filtered reward space
29 | :param input_reward_space: the input reward space
30 | :return: the filtered reward space
31 | """
32 | return input_reward_space
--------------------------------------------------------------------------------
/rl_coach/graph_managers/README.md:
--------------------------------------------------------------------------------
1 | # Block Factory
2 |
3 | The block factory is a class which creates a block that fits into a specific RL scheme.
4 | Example RL schemes are: self play, multi agent, HRL, basic RL, etc.
5 | The block factory should create all the components of the block and return the block scheduler.
6 | The block factory will then be used to create different combinations of components.
7 | For example, an HRL factory can be later instantiated with:
8 | * env = Atari Breakout
9 | * master (top hierarchy level) agent = DDPG
10 | * slave (bottom hierarchy level) agent = DQN
11 |
12 | A custom block factory implementation should look as follows:
13 |
14 | ```
15 | class CustomFactory(BlockFactory):
16 | def __init__(self, custom_params):
17 | super().__init__()
18 |
19 | def _create_block(self, task_index: int, device=None) -> BlockScheduler:
20 | """
21 | Create all the block modules and the block scheduler
22 | :param task_index: the index of the process on which the worker will be run
23 | :return: the initialized block scheduler
24 | """
25 |
26 | # Create env
27 | # Create composite agents
28 | # Create level managers
29 | # Create block scheduler
30 |
31 | return block_scheduler
32 | ```
--------------------------------------------------------------------------------
/rl_coach/graph_managers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/graph_managers/__init__.py
--------------------------------------------------------------------------------
/rl_coach/memories/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
--------------------------------------------------------------------------------
/rl_coach/memories/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/memories/backend/__init__.py
--------------------------------------------------------------------------------
/rl_coach/memories/backend/memory_impl.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 |
18 | from rl_coach.memories.backend.memory import MemoryBackendParameters
19 | from rl_coach.memories.backend.redis import RedisPubSubBackend, RedisPubSubMemoryBackendParameters
20 |
21 |
22 | def get_memory_backend(params: MemoryBackendParameters):
23 |
24 | backend = None
25 | if type(params) == RedisPubSubMemoryBackendParameters:
26 | backend = RedisPubSubBackend(params)
27 |
28 | return backend
29 |
30 |
31 | def construct_memory_params(json: dict):
32 |
33 | if json['store_type'] == 'redispubsub':
34 | memory_params = RedisPubSubMemoryBackendParameters(
35 | json['redis_address'], json['redis_port'], channel=json.get('channel', ''), run_type=json['run_type']
36 | )
37 | return memory_params
38 |
--------------------------------------------------------------------------------
/rl_coach/memories/episodic/__init__.py:
--------------------------------------------------------------------------------
1 | from .episodic_experience_replay import EpisodicExperienceReplayParameters, EpisodicExperienceReplay
2 | from .episodic_hindsight_experience_replay import EpisodicHindsightExperienceReplayParameters, EpisodicHindsightExperienceReplay
3 | from .episodic_hrl_hindsight_experience_replay import EpisodicHRLHindsightExperienceReplayParameters, EpisodicHRLHindsightExperienceReplay
4 | from .single_episode_buffer import SingleEpisodeBufferParameters, SingleEpisodeBuffer
5 | __all__ = [
6 | 'EpisodicExperienceReplayParameters',
7 | 'EpisodicHindsightExperienceReplayParameters',
8 | 'EpisodicHRLHindsightExperienceReplayParameters',
9 | 'SingleEpisodeBufferParameters',
10 | 'EpisodicExperienceReplay',
11 | 'EpisodicHindsightExperienceReplay',
12 | 'EpisodicHRLHindsightExperienceReplay',
13 | 'SingleEpisodeBuffer'
14 | ]
15 |
--------------------------------------------------------------------------------
/rl_coach/memories/episodic/single_episode_buffer.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
18 | from rl_coach.memories.memory import MemoryGranularity, MemoryParameters
19 |
20 |
21 | class SingleEpisodeBufferParameters(MemoryParameters):
22 | def __init__(self):
23 | super().__init__()
24 | del self.max_size
25 |
26 | @property
27 | def path(self):
28 | return 'rl_coach.memories.episodic.single_episode_buffer:SingleEpisodeBuffer'
29 |
30 |
31 | class SingleEpisodeBuffer(EpisodicExperienceReplay):
32 | def __init__(self):
33 | super().__init__((MemoryGranularity.Episodes, 1))
34 |
--------------------------------------------------------------------------------
/rl_coach/memories/non_episodic/__init__.py:
--------------------------------------------------------------------------------
1 | from .balanced_experience_replay import BalancedExperienceReplayParameters, BalancedExperienceReplay
2 | from .differentiable_neural_dictionary import QDND
3 | from .experience_replay import ExperienceReplayParameters, ExperienceReplay
4 | from .prioritized_experience_replay import PrioritizedExperienceReplayParameters, PrioritizedExperienceReplay
5 | from .transition_collection import TransitionCollection
6 | __all__ = [
7 | 'BalancedExperienceReplayParameters',
8 | 'BalancedExperienceReplay',
9 | 'QDND',
10 | 'ExperienceReplay',
11 | 'PrioritizedExperienceReplay',
12 | 'TransitionCollection'
13 | ]
14 |
--------------------------------------------------------------------------------
/rl_coach/off_policy_evaluators/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2019 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
--------------------------------------------------------------------------------
/rl_coach/off_policy_evaluators/bandits/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2019 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
--------------------------------------------------------------------------------
/rl_coach/off_policy_evaluators/rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/off_policy_evaluators/rl/__init__.py
--------------------------------------------------------------------------------
/rl_coach/orchestrators/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
--------------------------------------------------------------------------------
/rl_coach/orchestrators/deploy.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2017 Intel Corporation
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 |
18 |
19 | class DeployParameters(object):
20 |
21 | def __init__(self):
22 | pass
23 |
24 |
25 | class Deploy(object):
26 |
27 | def __init__(self, deploy_parameters):
28 | self.deploy_parameters = deploy_parameters
29 |
30 | def setup(self) -> bool:
31 | pass
32 |
33 | def deploy(self) -> bool:
34 | pass
35 |
--------------------------------------------------------------------------------
/rl_coach/presets/Atari_C51.py:
--------------------------------------------------------------------------------
1 | from rl_coach.agents.categorical_dqn_agent import CategoricalDQNAgentParameters
2 | from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
3 | from rl_coach.environments.environment import SingleLevelSelection
4 | from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule
5 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
6 |
7 | #########
8 | # Agent #
9 | #########
10 | agent_params = CategoricalDQNAgentParameters()
11 | agent_params.network_wrappers['main'].learning_rate = 0.00025
12 |
13 | ###############
14 | # Environment #
15 | ###############
16 | env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))
17 |
18 | ########
19 | # Test #
20 | ########
21 | preset_validation_params = PresetValidationParameters()
22 | preset_validation_params.trace_test_levels = ['breakout', 'pong', 'space_invaders']
23 |
24 | graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
25 | schedule_params=atari_schedule, vis_params=VisualizationParameters(),
26 | preset_validation_params=preset_validation_params)
27 |
--------------------------------------------------------------------------------
/rl_coach/presets/Atari_DDQN.py:
--------------------------------------------------------------------------------
1 | from rl_coach.agents.ddqn_agent import DDQNAgentParameters
2 | from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
3 | from rl_coach.environments.environment import SingleLevelSelection
4 | from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule
5 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
6 |
7 | #########
8 | # Agent #
9 | #########
10 | agent_params = DDQNAgentParameters()
11 | agent_params.network_wrappers['main'].learning_rate = 0.00025
12 |
13 | ###############
14 | # Environment #
15 | ###############
16 | env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))
17 |
18 | ########
19 | # Test #
20 | ########
21 | preset_validation_params = PresetValidationParameters()
22 | preset_validation_params.trace_test_levels = ['breakout', 'pong', 'space_invaders']
23 |
24 | graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
25 | schedule_params=atari_schedule, vis_params=VisualizationParameters(),
26 | preset_validation_params=preset_validation_params)
--------------------------------------------------------------------------------
/rl_coach/presets/Atari_DDQN_with_PER.py:
--------------------------------------------------------------------------------
1 | from rl_coach.agents.ddqn_agent import DDQNAgentParameters
2 | from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
3 | from rl_coach.environments.environment import SingleLevelSelection
4 | from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule
5 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
6 | from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters
7 | from rl_coach.schedules import LinearSchedule
8 |
9 | #########
10 | # Agent #
11 | #########
12 | agent_params = DDQNAgentParameters()
13 | agent_params.network_wrappers['main'].learning_rate = 0.00025/4
14 | agent_params.memory = PrioritizedExperienceReplayParameters()
15 | agent_params.memory.beta = LinearSchedule(0.4, 1, 12500000) # 12.5M training iterations = 50M steps = 200M frames
16 |
17 | ###############
18 | # Environment #
19 | ###############
20 | env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))
21 |
22 | ########
23 | # Test #
24 | ########
25 | preset_validation_params = PresetValidationParameters()
26 | preset_validation_params.trace_test_levels = ['breakout', 'pong', 'space_invaders']
27 |
28 | graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
29 | schedule_params=atari_schedule, vis_params=VisualizationParameters(),
30 | preset_validation_params=preset_validation_params)
31 |
--------------------------------------------------------------------------------
/rl_coach/presets/Atari_DQN.py:
--------------------------------------------------------------------------------
1 | from rl_coach.agents.dqn_agent import DQNAgentParameters
2 | from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
3 | from rl_coach.environments.environment import SingleLevelSelection
4 | from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule
5 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
6 |
7 | #########
8 | # Agent #
9 | #########
10 | agent_params = DQNAgentParameters()
11 | # since we are using Adam instead of RMSProp, we adjust the learning rate as well
12 | agent_params.network_wrappers['main'].learning_rate = 0.0001
13 |
14 | ###############
15 | # Environment #
16 | ###############
17 | env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))
18 |
19 | ########
20 | # Test #
21 | ########
22 | preset_validation_params = PresetValidationParameters()
23 | preset_validation_params.trace_test_levels = ['breakout', 'pong', 'space_invaders']
24 |
25 | graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
26 | schedule_params=atari_schedule, vis_params=VisualizationParameters(),
27 | preset_validation_params=preset_validation_params)
28 |
--------------------------------------------------------------------------------
/rl_coach/presets/Atari_DQN_with_PER.py:
--------------------------------------------------------------------------------
1 | from rl_coach.agents.dqn_agent import DQNAgentParameters
2 | from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
3 | from rl_coach.environments.environment import SingleLevelSelection
4 | from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule
5 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
6 | from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters
7 | from rl_coach.schedules import LinearSchedule
8 |
9 |
10 | #########
11 | # Agent #
12 | #########
13 | agent_params = DQNAgentParameters()
14 | agent_params.network_wrappers['main'].learning_rate = 0.00025
15 | agent_params.memory = PrioritizedExperienceReplayParameters()
16 | agent_params.memory.beta = LinearSchedule(0.4, 1, 12500000) # 12.5M training iterations = 50M steps = 200M frames
17 |
18 | ###############
19 | # Environment #
20 | ###############
21 | env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))
22 |
23 | ########
24 | # Test #
25 | ########
26 | preset_validation_params = PresetValidationParameters()
27 | preset_validation_params.trace_test_levels = ['breakout', 'pong', 'space_invaders']
28 |
29 | graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
30 | schedule_params=atari_schedule, vis_params=VisualizationParameters(),
31 | preset_validation_params=preset_validation_params)
32 |
--------------------------------------------------------------------------------
/rl_coach/presets/Atari_QR_DQN.py:
--------------------------------------------------------------------------------
1 | from rl_coach.agents.qr_dqn_agent import QuantileRegressionDQNAgentParameters
2 | from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
3 | from rl_coach.environments.environment import SingleLevelSelection
4 | from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule
5 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
6 |
7 | #########
8 | # Agent #
9 | #########
10 | agent_params = QuantileRegressionDQNAgentParameters()
11 | agent_params.network_wrappers['main'].learning_rate = 0.00005 # called alpha in the paper
12 | agent_params.algorithm.huber_loss_interval = 1 # k = 0 for strict quantile loss, k = 1 for Huber quantile loss
13 |
14 | ###############
15 | # Environment #
16 | ###############
17 | env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))
18 |
19 | ########
20 | # Test #
21 | ########
22 | preset_validation_params = PresetValidationParameters()
23 | preset_validation_params.trace_test_levels = ['breakout', 'pong', 'space_invaders']
24 |
25 | graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
26 | schedule_params=atari_schedule, vis_params=VisualizationParameters(),
27 | preset_validation_params=preset_validation_params)
28 |
--------------------------------------------------------------------------------
/rl_coach/presets/Atari_UCB_with_Q_Ensembles.py:
--------------------------------------------------------------------------------
1 | from rl_coach.agents.bootstrapped_dqn_agent import BootstrappedDQNAgentParameters
2 | from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
3 | from rl_coach.environments.environment import SingleLevelSelection
4 | from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule
5 | from rl_coach.exploration_policies.ucb import UCBParameters
6 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
7 |
8 | #########
9 | # Agent #
10 | #########
11 | agent_params = BootstrappedDQNAgentParameters()
12 | agent_params.network_wrappers['main'].learning_rate = 0.00025
13 | agent_params.exploration = UCBParameters()
14 |
15 | ###############
16 | # Environment #
17 | ###############
18 | env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))
19 |
20 | ########
21 | # Test #
22 | ########
23 | preset_validation_params = PresetValidationParameters()
24 | preset_validation_params.trace_test_levels = ['breakout', 'pong', 'space_invaders']
25 |
26 | graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
27 | schedule_params=atari_schedule, vis_params=VisualizationParameters(),
28 | preset_validation_params=preset_validation_params)
29 |
--------------------------------------------------------------------------------
/rl_coach/presets/README.md:
--------------------------------------------------------------------------------
1 | # Defining Presets
2 |
3 | In Coach, we use a Preset mechanism in order to define reproducible experiments.
4 | A Preset defines all the parameters of an experiment in a single file, and can be executed from the command
5 | line using the file name.
6 | Presets can be very simple by using the default parameters of the algorithm and environment.
7 | They can also be explicit and define all the parameters in order to avoid hidden logic.
8 | The outcome of a preset is a GraphManager.
9 |
10 |
11 | Let's start with the simplest preset possible.
12 | We will define a preset for training the CartPole environment using Clipped PPO.
13 | The 3 minimal things we need to define in each preset are the agent, the environment and a schedule.
14 |
15 | ```
16 | from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
17 | from rl_coach.environments.gym_environment import GymVectorEnvironment
18 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
19 | from rl_coach.graph_managers.graph_manager import SimpleSchedule
20 |
21 | graph_manager = BasicRLGraphManager(
22 | agent_params=ClippedPPOAgentParameters(),
23 | env_params=GymVectorEnvironment(level='CartPole-v0'),
24 | schedule_params=SimpleSchedule()
25 | )
26 | ```
27 |
28 | Most presets in Coach are much more explicit than this. The motivation behind this is to be as transparent as
29 | possible regarding all the changes needed relative to the basic parameters defined in the algorithm paper.
--------------------------------------------------------------------------------
/rl_coach/presets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/presets/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/agents/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/agents/test_agent_external_communication.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | from rl_coach.base_parameters import TaskParameters, Frameworks
5 |
6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
7 | import tensorflow as tf
8 | from tensorflow import logging
9 | import pytest
10 | logging.set_verbosity(logging.INFO)
11 |
12 |
13 | @pytest.mark.unit_test
14 | def test_get_QActionStateValue_predictions():
15 | tf.reset_default_graph()
16 | from rl_coach.presets.CartPole_DQN import graph_manager as cartpole_dqn_graph_manager
17 | assert cartpole_dqn_graph_manager
18 | cartpole_dqn_graph_manager.create_graph(task_parameters=
19 | TaskParameters(framework_type=Frameworks.tensorflow,
20 | experiment_path="./experiments/test"))
21 | cartpole_dqn_graph_manager.improve_steps.num_steps = 1
22 | cartpole_dqn_graph_manager.steps_between_evaluation_periods.num_steps = 5
23 |
24 | # graph_manager.improve()
25 | #
26 | # agent = graph_manager.level_managers[0].composite_agents['simple_rl_agent'].agents['simple_rl_agent/agent']
27 | # some_state = agent.memory.sample(1)[0].state
28 | # cartpole_dqn_predictions = agent.get_predictions(states=some_state, prediction_type=QActionStateValue)
29 | # assert cartpole_dqn_predictions.shape == (1, 2)
30 |
31 |
32 | if __name__ == '__main__':
33 | test_get_QActionStateValue_predictions()
34 |
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/architectures/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/mxnet_components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/architectures/mxnet_components/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/mxnet_components/embedders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/architectures/mxnet_components/embedders/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/mxnet_components/embedders/test_image_embedder.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import os
3 | import pytest
4 | import sys
5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
6 |
7 |
8 | from rl_coach.base_parameters import EmbedderScheme
9 | from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
10 | from rl_coach.architectures.mxnet_components.embedders.image_embedder import ImageEmbedder
11 |
12 |
13 | @pytest.mark.unit_test
14 | def test_image_embedder():
15 | params = InputEmbedderParameters(scheme=EmbedderScheme.Medium)
16 | emb = ImageEmbedder(params=params)
17 | emb.initialize()
18 | # input is NHWC, and not MXNet default NCHW
19 | input_data = mx.nd.random.uniform(low=0, high=1, shape=(10, 244, 244, 3))
20 | output = emb(input_data)
21 | assert len(output.shape) == 2 # since last block was flatten
22 | assert output.shape[0] == 10 # since batch_size is 10
23 |
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/mxnet_components/embedders/test_vector_embedder.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import os
3 | import pytest
4 | import sys
5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
6 |
7 |
8 | from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
9 | from rl_coach.architectures.mxnet_components.embedders.vector_embedder import VectorEmbedder
10 | from rl_coach.base_parameters import EmbedderScheme
11 |
12 |
13 | @pytest.mark.unit_test
14 | def test_vector_embedder():
15 | params = InputEmbedderParameters(scheme=EmbedderScheme.Medium)
16 | emb = VectorEmbedder(params=params)
17 | emb.initialize()
18 | input_data = mx.nd.random.uniform(low=0, high=255, shape=(10, 100))
19 | output = emb(input_data)
20 | assert len(output.shape) == 2 # since last block was flatten
21 | assert output.shape[0] == 10 # since batch_size is 10
22 | assert output.shape[1] == 256 # since last dense layer has 256 units
23 |
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/mxnet_components/heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/architectures/mxnet_components/heads/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/mxnet_components/heads/test_head.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import numpy as np
3 | import os
4 | import pytest
5 | import sys
6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
7 |
8 |
9 | from rl_coach.architectures.mxnet_components.heads.head import NormalizedRSSInitializer
10 |
11 |
12 | @pytest.mark.unit_test
13 | def test_normalized_rss_initializer():
14 | target_rss = 0.5
15 | units = 10
16 | dense = mx.gluon.nn.Dense(units=units, weight_initializer=NormalizedRSSInitializer(target_rss))
17 | dense.initialize()
18 |
19 | input_data = mx.random.uniform(shape=(25, 5))
20 | output_data = dense(input_data)
21 |
22 | weights = dense.weight.data()
23 | assert weights.shape == (10, 5)
24 | rss = weights.square().sum(axis=1).sqrt()
25 | np.testing.assert_almost_equal(rss.asnumpy(), np.tile(target_rss, units))
26 |
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/mxnet_components/middlewares/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/architectures/mxnet_components/middlewares/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/mxnet_components/middlewares/test_fc_middleware.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import os
3 | import pytest
4 | import sys
5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
6 |
7 |
8 | from rl_coach.base_parameters import MiddlewareScheme
9 | from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters
10 | from rl_coach.architectures.mxnet_components.middlewares.fc_middleware import FCMiddleware
11 |
12 |
13 | @pytest.mark.unit_test
14 | def test_fc_middleware():
15 | params = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium)
16 | mid = FCMiddleware(params=params)
17 | mid.initialize()
18 | embedded_data = mx.nd.random.uniform(low=0, high=1, shape=(10, 100))
19 | output = mid(embedded_data)
20 | assert output.ndim == 2 # since last block was flatten
21 | assert output.shape[0] == 10 # since batch_size is 10
22 | assert output.shape[1] == 512 # since last layer of middleware (middle scheme) had 512 units
23 |
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/mxnet_components/middlewares/test_lstm_middleware.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import os
3 | import pytest
4 | import sys
5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
6 |
7 |
8 | from rl_coach.base_parameters import MiddlewareScheme
9 | from rl_coach.architectures.middleware_parameters import LSTMMiddlewareParameters
10 | from rl_coach.architectures.mxnet_components.middlewares.lstm_middleware import LSTMMiddleware
11 |
12 |
13 | @pytest.mark.unit_test
14 | def test_lstm_middleware():
15 | params = LSTMMiddlewareParameters(number_of_lstm_cells=25, scheme=MiddlewareScheme.Medium)
16 | mid = LSTMMiddleware(params=params)
17 | mid.initialize()
18 | # NTC
19 | embedded_data = mx.nd.random.uniform(low=0, high=1, shape=(10, 15, 20))
20 | # NTC -> TNC
21 | output = mid(embedded_data)
22 | assert output.ndim == 3 # since last block was flatten
23 | assert output.shape[0] == 15 # since t is 15
24 | assert output.shape[1] == 10 # since batch_size is 10
25 | assert output.shape[2] == 25 # since number_of_lstm_cells is 25
26 |
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/tensorflow_components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/architectures/tensorflow_components/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/tensorflow_components/embedders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/architectures/tensorflow_components/embedders/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | from rl_coach.base_parameters import EmbedderScheme
5 |
6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
7 |
8 | import pytest
9 | import numpy as np
10 | from rl_coach.architectures.tensorflow_components.embedders.vector_embedder import VectorEmbedder
11 | import tensorflow as tf
12 | from tensorflow import logging
13 |
14 | logging.set_verbosity(logging.INFO)
15 |
16 | @pytest.fixture
17 | def reset():
18 | tf.reset_default_graph()
19 |
20 |
21 | @pytest.mark.unit_test
22 | def test_embedder(reset):
23 | embedder = VectorEmbedder(np.array([10, 10]), name="test", scheme=EmbedderScheme.Empty)
24 |
25 | # make sure the ops where not created yet
26 | assert len(tf.get_default_graph().get_operations()) == 0
27 |
28 | # call the embedder
29 | input_ph, output_ph = embedder()
30 |
31 | # make sure that now the ops were created
32 | assert len(tf.get_default_graph().get_operations()) > 0
33 |
34 | # try feeding a batch of one example # TODO: consider auto converting to batch
35 | input = np.random.rand(1, 10, 10)
36 | sess = tf.Session()
37 | output = sess.run(embedder.output, {embedder.input: input})
38 | assert output.shape == (1, 100) # should have flattened the input
39 |
40 | # now make sure the returned placeholders behave the same
41 | output = sess.run(output_ph, {input_ph: input})
42 | assert output.shape == (1, 100) # should have flattened the input
43 |
44 | # make sure the naming is correct
45 | assert embedder.get_name() == "test"
46 |
--------------------------------------------------------------------------------
/rl_coach/tests/environments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/environments/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/exploration_policies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/exploration_policies/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/exploration_policies/test_additive_noise.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
4 |
5 | import pytest
6 |
7 | from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
8 | from rl_coach.exploration_policies.additive_noise import AdditiveNoise
9 | from rl_coach.schedules import LinearSchedule
10 | import numpy as np
11 |
12 |
13 | @pytest.mark.unit_test
14 | def test_init():
15 | # discrete control
16 | action_space = DiscreteActionSpace(3)
17 | noise_schedule = LinearSchedule(1.0, 1.0, 1000)
18 |
19 | # additive noise requires a bounded range for the actions
20 | action_space = BoxActionSpace(np.array([10]))
21 | with pytest.raises(ValueError):
22 | policy = AdditiveNoise(action_space, noise_schedule, 0)
23 |
24 |
25 | @pytest.mark.unit_test
26 | def test_get_action():
27 | # make sure noise is in range
28 | action_space = BoxActionSpace(np.array([10]), -1, 1)
29 | noise_schedule = LinearSchedule(1.0, 1.0, 1000)
30 | policy = AdditiveNoise(action_space, noise_schedule, 0)
31 |
32 | # the action range is 2, so there is a ~0.1% chance that the noise will be larger than 3*std=3*2=6
33 | for i in range(1000):
34 | action = policy.get_action(np.zeros([10]))
35 | assert np.all(action < 10)
36 | # make sure there is no clipping of the action since it should be the environment that clips actions
37 | assert np.all(action != 1.0)
38 | assert np.all(action != -1.0)
39 | # make sure that each action element has a different value
40 | assert np.all(action[0] != action[1:])
41 |
--------------------------------------------------------------------------------
/rl_coach/tests/exploration_policies/test_greedy.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
4 |
5 | import pytest
6 |
7 | from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace
8 | from rl_coach.exploration_policies.greedy import Greedy
9 | import numpy as np
10 |
11 |
12 | @pytest.mark.unit_test
13 | def test_get_action():
14 | # discrete control
15 | action_space = DiscreteActionSpace(3)
16 | policy = Greedy(action_space)
17 |
18 | best_action, _ = policy.get_action(np.array([10, 20, 30]))
19 | assert best_action == 2
20 |
21 | # continuous control
22 | action_space = BoxActionSpace(np.array([10]))
23 | policy = Greedy(action_space)
24 |
25 | best_action = policy.get_action(np.array([1, 1, 1]))
26 | assert np.all(best_action == np.array([1, 1, 1]))
27 |
28 |
29 | @pytest.mark.unit_test
30 | def test_get_control_param():
31 | action_space = DiscreteActionSpace(3)
32 | policy = Greedy(action_space)
33 | assert policy.get_control_param() == 0
34 |
35 |
--------------------------------------------------------------------------------
/rl_coach/tests/filters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/filters/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/filters/action/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/filters/action/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/filters/action/test_box_masking.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
4 |
5 | import pytest
6 | from rl_coach.filters.action.box_masking import BoxMasking
7 | from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
8 | import numpy as np
9 |
10 |
11 | @pytest.mark.unit_test
12 | def test_filter():
13 | filter = BoxMasking(10, 20)
14 |
15 | # passing an output space that is wrong
16 | with pytest.raises(ValueError):
17 | filter.validate_output_action_space(DiscreteActionSpace(10))
18 |
19 | # 1 dimensional box
20 | output_space = BoxActionSpace(1, 5, 30)
21 | input_space = filter.get_unfiltered_action_space(output_space)
22 |
23 | action = np.array([2])
24 | result = filter.filter(action)
25 | assert result == np.array([12])
26 | assert output_space.contains(result)
27 |
28 |
--------------------------------------------------------------------------------
/rl_coach/tests/filters/action/test_linear_box_to_box_map.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
4 |
5 | import pytest
6 | from rl_coach.filters.action.linear_box_to_box_map import LinearBoxToBoxMap
7 | from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
8 | import numpy as np
9 |
10 |
11 | @pytest.mark.unit_test
12 | def test_filter():
13 | filter = LinearBoxToBoxMap(10, 20)
14 |
15 | # passing an output space that is wrong
16 | with pytest.raises(ValueError):
17 | filter.validate_output_action_space(DiscreteActionSpace(10))
18 |
19 | # 1 dimensional box
20 | output_space = BoxActionSpace(1, 5, 35)
21 | input_space = filter.get_unfiltered_action_space(output_space)
22 |
23 | action = np.array([2])
24 |
25 | action = np.array([12])
26 | result = filter.filter(action)
27 | assert result == np.array([11])
28 | assert output_space.contains(result)
29 |
30 |
--------------------------------------------------------------------------------
/rl_coach/tests/filters/observation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/filters/observation/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/filters/reward/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/filters/reward/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/graph_managers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/graph_managers/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/memories/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/memories/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/presets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/presets/__init__.py
--------------------------------------------------------------------------------
/rl_coach/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | # content of pytest.ini
2 | [pytest]
3 | markers =
4 | unit_test: short test that checks that a module is acting correctly
5 | integration_test: long test that checks that the complete framework is running correctly
6 | filterwarnings =
7 | ignore::DeprecationWarning
8 | norecursedirs =
9 | *mxnet*
10 |
--------------------------------------------------------------------------------
/rl_coach/tests/test_saver.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from rl_coach.saver import Saver, SaverCollection
4 |
5 |
6 | @pytest.mark.unit_test
7 | def test_checkpoint_collection():
8 | class SaverTest(Saver):
9 | def __init__(self, path):
10 | self._path = path
11 | self._count = 1
12 |
13 | @property
14 | def path(self):
15 | return self._path
16 |
17 | def merge(self, other: 'Saver'):
18 | assert isinstance(other, SaverTest)
19 | assert self.path == other.path
20 | self._count += other._count
21 |
22 | # test add
23 | savers = SaverCollection(SaverTest('123'))
24 | savers.add(SaverTest('123'))
25 | savers.add(SaverTest('456'))
26 |
27 | def check_collection(mul):
28 | paths = ['123', '456']
29 | for c in savers:
30 | paths.remove(c.path)
31 | if c.path == '123':
32 | assert c._count == 2 * mul
33 | elif c.path == '456':
34 | assert c._count == 1 * mul
35 | else:
36 | assert False, "invalid path"
37 |
38 | check_collection(1)
39 |
40 | # test update
41 | savers.update(savers)
42 | check_collection(2)
43 |
--------------------------------------------------------------------------------
/rl_coach/tests/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/tests/utils/__init__.py
--------------------------------------------------------------------------------
/rl_coach/utilities/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/rl_coach/utilities/__init__.py
--------------------------------------------------------------------------------
/tutorials/Resources/exploration.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | from typing import List
4 | from rl_coach.core_types import ActionType
5 | from rl_coach.spaces import ActionSpace
6 | from rl_coach.exploration_policies.exploration_policy import ExplorationPolicy, ExplorationParameters
7 |
8 |
9 | class MyExplorationPolicy(ExplorationPolicy):
10 | """
11 | An exploration policy takes the predicted actions or action values from the agent, and selects the action to
12 | actually apply to the environment using some predefined algorithm.
13 | """
14 | def __init__(self, action_space: ActionSpace):
15 | #self.phase = RunPhase.HEATUP
16 | self.action_space = action_space
17 | super().__init__(action_space)
18 |
19 | def get_action(self, action_values: List[ActionType]) -> ActionType:
20 | if (np.random.rand() < 0.5):
21 | chosen_action = self.action_space.sample()
22 | else:
23 | chosen_action = np.argmax(action_values)
24 | probabilities = np.zeros(len(self.action_space.actions))
25 | probabilities[chosen_action] = 1
26 | return chosen_action, probabilities
27 |
28 | def get_control_param(self):
29 | return 0
30 |
31 |
32 |
33 | class MyExplorationParameters(ExplorationParameters):
34 | def __init__(self):
35 | super().__init__()
36 |
37 | @property
38 | def path(self):
39 | return 'exploration:MyExplorationPolicy'
40 |
--------------------------------------------------------------------------------
/tutorials/Resources/img/dr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/tutorials/Resources/img/dr.png
--------------------------------------------------------------------------------
/tutorials/Resources/img/model_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/tutorials/Resources/img/model_selection.png
--------------------------------------------------------------------------------
/tutorials/Resources/img/wis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IntelLabs/coach/2c60cb5acd8cd3c9c381a5066c208e69fc273c7b/tutorials/Resources/img/wis.png
--------------------------------------------------------------------------------
/tutorials/python_invocation_example.py:
--------------------------------------------------------------------------------
1 | from rl_coach.agents.clipped_ppo_agent import ClippedPPOAgentParameters
2 | from rl_coach.core_types import EnvironmentSteps
3 | from rl_coach.environments.gym_environment import GymVectorEnvironment
4 | from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
5 | from rl_coach.graph_managers.graph_manager import SimpleSchedule
6 |
7 | graph_manager = BasicRLGraphManager(
8 | agent_params=ClippedPPOAgentParameters(),
9 | env_params=GymVectorEnvironment(level='CartPole-v0'),
10 | schedule_params=SimpleSchedule()
11 | )
12 |
13 | graph_manager.heatup(EnvironmentSteps(100))
14 | graph_manager.train_and_act(EnvironmentSteps(100))
--------------------------------------------------------------------------------