├── .flake8 ├── .gitignore ├── .pre-commit-config.yaml ├── README.md ├── commitlint.config.js ├── discounting_chain.ipynb ├── discounting_chain ├── __init__.py ├── a2c.py ├── base.py ├── bmg_a2c.py ├── data │ ├── discounting_chain_appendix_histories_array.pickle │ └── discounting_chain_histories_array.pickle ├── data_generation │ ├── __init__.py │ ├── n_step.py │ ├── n_step_data_generator.py │ └── n_step_utils.py ├── envs │ ├── __init__.py │ ├── env_utils.py │ ├── gymnax_dc_wrapper.py │ └── gymnax_wrapper.py ├── list_logger.py ├── meta_a2c.py ├── nets.py ├── nets_split.py └── train_utils.py ├── discounting_chain_train.ipynb ├── mypy.ini ├── plots ├── discounting_chain │ ├── dc_chain_discount_factor.png │ ├── dc_chain_discount_factor_appendix.png │ ├── dc_chain_outer_loss_advantage.png │ ├── dc_chain_outer_loss_advantage_appendix.png │ ├── dc_chain_return.png │ └── dc_chain_return_appendix.png └── snake │ ├── snake_discount_factor.png │ ├── snake_discount_factor_appendix.png │ ├── snake_outer_loss_advantage.png │ ├── snake_outer_loss_advantage_appendix.png │ ├── snake_return.png │ └── snake_return_appendix.png ├── requirements-dev.txt ├── requirements.txt ├── snake.ipynb ├── snake ├── __init__.py ├── agent │ ├── __init__.py │ ├── a2c.py │ ├── actor_critic_agent.py │ ├── gae.py │ └── meta_a2c.py ├── configs │ ├── agent │ │ ├── a2c.yaml │ │ ├── bootstrap.yaml │ │ └── mgrl.yaml │ └── config.yaml ├── data │ ├── a2c_gamma │ │ ├── MET-2150__train_mean_gamma.csv │ │ ├── MET-2156__train_mean_gamma.csv │ │ ├── MET-2158__train_mean_gamma.csv │ │ ├── MET-2162__train_mean_gamma.csv │ │ ├── MET-2165__train_mean_gamma.csv │ │ ├── MET-2168__train_mean_gamma.csv │ │ ├── MET-2171__train_mean_gamma.csv │ │ ├── MET-2175__train_mean_gamma.csv │ │ ├── MET-2177__train_mean_gamma.csv │ │ └── MET-2180__train_mean_gamma.csv │ ├── a2c_return │ │ ├── MET-2150__eval_episode_reward_determinist_policy.csv │ │ ├── MET-2156__eval_episode_reward_determinist_policy.csv │ │ ├── MET-2158__eval_episode_reward_determinist_policy.csv │ │ ├── MET-2162__eval_episode_reward_determinist_policy.csv │ │ ├── MET-2165__eval_episode_reward_determinist_policy.csv │ │ ├── MET-2168__eval_episode_reward_determinist_policy.csv │ │ ├── MET-2171__eval_episode_reward_determinist_policy.csv │ │ ├── MET-2175__eval_episode_reward_determinist_policy.csv │ │ ├── MET-2177__eval_episode_reward_determinist_policy.csv │ │ └── MET-2180__eval_episode_reward_determinist_policy.csv │ ├── appendix │ │ ├── bias │ │ │ ├── bias_bootstrap_no_outer_no_norm.csv │ │ │ ├── bias_bootstrap_no_outer_norm.csv │ │ │ ├── bias_bootstrap_outer_no_norm.csv │ │ │ ├── bias_bootstrap_outer_norm.csv │ │ │ ├── bias_mgrl_no_outer_no_norm.csv │ │ │ ├── bias_mgrl_no_outer_norm.csv │ │ │ ├── bias_mgrl_outer_no_norm.csv │ │ │ └── bias_mgrl_outer_norm.csv │ │ ├── gamma │ │ │ ├── gamma_a2c_no_norm.csv │ │ │ ├── gamma_a2c_norm.csv │ │ │ ├── gamma_bootstrap_no_outer_no_norm.csv │ │ │ ├── gamma_bootstrap_no_outer_norm.csv │ │ │ ├── gamma_bootstrap_outer_no_norm.csv │ │ │ ├── gamma_bootstrap_outer_norm.csv │ │ │ ├── gamma_mgrl_no_outer_no_norm.csv │ │ │ ├── gamma_mgrl_no_outer_norm.csv │ │ │ ├── gamma_mgrl_outer_no_norm.csv │ │ │ └── gamma_mgrl_outer_norm.csv │ │ └── return │ │ │ ├── return_a2c_no_norm.csv │ │ │ ├── return_a2c_norm.csv │ │ │ ├── return_bootstrap_no_outer_no_norm.csv │ │ │ ├── return_bootstrap_no_outer_norm.csv │ │ │ ├── return_bootstrap_outer_no_norm.csv │ │ │ ├── return_bootstrap_outer_norm.csv │ │ │ ├── return_mgrl_no_outer_no_norm.csv │ │ │ ├── return_mgrl_no_outer_norm.csv │ │ │ ├── return_mgrl_outer_no_norm.csv │ │ │ └── return_mgrl_outer_norm.csv │ ├── bootstrap_bias │ │ ├── MET-2154__train_mean_advantage_outer_loss.csv │ │ ├── MET-2160__train_mean_advantage_outer_loss.csv │ │ ├── MET-2166__train_mean_advantage_outer_loss.csv │ │ ├── MET-2172__train_mean_advantage_outer_loss.csv │ │ ├── MET-2178__train_mean_advantage_outer_loss.csv │ │ ├── MET-2183__train_mean_advantage_outer_loss.csv │ │ ├── MET-2188__train_mean_advantage_outer_loss.csv │ │ ├── MET-2192__train_mean_advantage_outer_loss.csv │ │ ├── MET-2196__train_mean_advantage_outer_loss.csv │ │ └── MET-2199__train_mean_advantage_outer_loss.csv │ ├── bootstrap_gamma │ │ ├── MET-2154__train_mean_gamma.csv │ │ ├── MET-2160__train_mean_gamma.csv │ │ ├── MET-2166__train_mean_gamma.csv │ │ ├── MET-2172__train_mean_gamma.csv │ │ ├── MET-2178__train_mean_gamma.csv │ │ ├── MET-2183__train_mean_gamma.csv │ │ ├── MET-2188__train_mean_gamma.csv │ │ ├── MET-2192__train_mean_gamma.csv │ │ ├── MET-2196__train_mean_gamma.csv │ │ └── MET-2199__train_mean_gamma.csv │ ├── bootstrap_outer_critic_bias │ │ ├── MET-2153__train_mean_advantage_outer_loss.csv │ │ ├── MET-2161__train_mean_advantage_outer_loss.csv │ │ ├── MET-2167__train_mean_advantage_outer_loss.csv │ │ ├── MET-2173__train_mean_advantage_outer_loss.csv │ │ ├── MET-2181__train_mean_advantage_outer_loss.csv │ │ ├── MET-2185__train_mean_advantage_outer_loss.csv │ │ ├── MET-2190__train_mean_advantage_outer_loss.csv │ │ ├── MET-2195__train_mean_advantage_outer_loss.csv │ │ ├── MET-2198__train_mean_advantage_outer_loss.csv │ │ └── MET-2200__train_mean_advantage_outer_loss.csv │ ├── bootstrap_outer_critic_gamma │ │ ├── MET-2153__train_mean_gamma.csv │ │ ├── MET-2161__train_mean_gamma.csv │ │ ├── MET-2167__train_mean_gamma.csv │ │ ├── MET-2173__train_mean_gamma.csv │ │ ├── MET-2181__train_mean_gamma.csv │ │ ├── MET-2185__train_mean_gamma.csv │ │ ├── MET-2190__train_mean_gamma.csv │ │ ├── MET-2195__train_mean_gamma.csv │ │ ├── MET-2198__train_mean_gamma.csv │ │ └── MET-2200__train_mean_gamma.csv │ ├── bootstrap_outer_critic_return │ │ ├── MET-2153__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2161__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2167__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2173__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2181__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2185__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2190__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2195__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2198__eval_episode_reward_stochastic_policy.csv │ │ └── MET-2200__eval_episode_reward_stochastic_policy.csv │ ├── bootstrap_return │ │ ├── MET-2154__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2160__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2166__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2172__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2178__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2183__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2188__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2192__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2196__eval_episode_reward_stochastic_policy.csv │ │ └── MET-2199__eval_episode_reward_stochastic_policy.csv │ ├── mgrl_bias │ │ ├── MET-2151__train_mean_advantage_outer_loss.csv │ │ ├── MET-2157__train_mean_advantage_outer_loss.csv │ │ ├── MET-2163__train_mean_advantage_outer_loss.csv │ │ ├── MET-2169__train_mean_advantage_outer_loss.csv │ │ ├── MET-2174__train_mean_advantage_outer_loss.csv │ │ ├── MET-2179__train_mean_advantage_outer_loss.csv │ │ ├── MET-2184__train_mean_advantage_outer_loss.csv │ │ ├── MET-2187__train_mean_advantage_outer_loss.csv │ │ ├── MET-2191__train_mean_advantage_outer_loss.csv │ │ └── MET-2194__train_mean_advantage_outer_loss.csv │ ├── mgrl_gamma │ │ ├── MET-2151__train_mean_gamma.csv │ │ ├── MET-2157__train_mean_gamma.csv │ │ ├── MET-2163__train_mean_gamma.csv │ │ ├── MET-2169__train_mean_gamma.csv │ │ ├── MET-2174__train_mean_gamma.csv │ │ ├── MET-2179__train_mean_gamma.csv │ │ ├── MET-2184__train_mean_gamma.csv │ │ ├── MET-2187__train_mean_gamma.csv │ │ ├── MET-2191__train_mean_gamma.csv │ │ └── MET-2194__train_mean_gamma.csv │ ├── mgrl_outer_critic_bias │ │ ├── MET-2152__train_mean_advantage_outer_loss.csv │ │ ├── MET-2159__train_mean_advantage_outer_loss.csv │ │ ├── MET-2164__train_mean_advantage_outer_loss.csv │ │ ├── MET-2170__train_mean_advantage_outer_loss.csv │ │ ├── MET-2176__train_mean_advantage_outer_loss.csv │ │ ├── MET-2182__train_mean_advantage_outer_loss.csv │ │ ├── MET-2186__train_mean_advantage_outer_loss.csv │ │ ├── MET-2189__train_mean_advantage_outer_loss.csv │ │ ├── MET-2193__train_mean_advantage_outer_loss.csv │ │ └── MET-2197__train_mean_advantage_outer_loss.csv │ ├── mgrl_outer_critic_gamma │ │ ├── MET-2152__train_mean_gamma.csv │ │ ├── MET-2159__train_mean_gamma.csv │ │ ├── MET-2164__train_mean_gamma.csv │ │ ├── MET-2170__train_mean_gamma.csv │ │ ├── MET-2176__train_mean_gamma.csv │ │ ├── MET-2182__train_mean_gamma.csv │ │ ├── MET-2186__train_mean_gamma.csv │ │ ├── MET-2189__train_mean_gamma.csv │ │ ├── MET-2193__train_mean_gamma.csv │ │ └── MET-2197__train_mean_gamma.csv │ ├── mgrl_outer_critic_return │ │ ├── MET-2152__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2159__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2164__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2170__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2176__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2182__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2186__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2189__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2193__eval_episode_reward_stochastic_policy.csv │ │ └── MET-2197__eval_episode_reward_stochastic_policy.csv │ └── mgrl_return │ │ ├── MET-2151__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2157__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2163__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2169__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2174__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2179__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2184__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2187__eval_episode_reward_stochastic_policy.csv │ │ ├── MET-2191__eval_episode_reward_stochastic_policy.csv │ │ └── MET-2194__eval_episode_reward_stochastic_policy.csv ├── env │ ├── __init__.py │ └── snake.py ├── networks │ ├── __init__.py │ ├── actor_critic.py │ ├── cnn.py │ ├── distribution.py │ └── snake.py └── training │ ├── __init__.py │ ├── config.py │ ├── evaluator.py │ ├── logger.py │ ├── setup_run.py │ ├── types.py │ └── utils.py └── snake_train.py /.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/.flake8 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/README.md -------------------------------------------------------------------------------- /commitlint.config.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/commitlint.config.js -------------------------------------------------------------------------------- /discounting_chain.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain.ipynb -------------------------------------------------------------------------------- /discounting_chain/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /discounting_chain/a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/a2c.py -------------------------------------------------------------------------------- /discounting_chain/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/base.py -------------------------------------------------------------------------------- /discounting_chain/bmg_a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/bmg_a2c.py -------------------------------------------------------------------------------- /discounting_chain/data/discounting_chain_appendix_histories_array.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/data/discounting_chain_appendix_histories_array.pickle -------------------------------------------------------------------------------- /discounting_chain/data/discounting_chain_histories_array.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/data/discounting_chain_histories_array.pickle -------------------------------------------------------------------------------- /discounting_chain/data_generation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /discounting_chain/data_generation/n_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/data_generation/n_step.py -------------------------------------------------------------------------------- /discounting_chain/data_generation/n_step_data_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/data_generation/n_step_data_generator.py -------------------------------------------------------------------------------- /discounting_chain/data_generation/n_step_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/data_generation/n_step_utils.py -------------------------------------------------------------------------------- /discounting_chain/envs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /discounting_chain/envs/env_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/envs/env_utils.py -------------------------------------------------------------------------------- /discounting_chain/envs/gymnax_dc_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/envs/gymnax_dc_wrapper.py -------------------------------------------------------------------------------- /discounting_chain/envs/gymnax_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/envs/gymnax_wrapper.py -------------------------------------------------------------------------------- /discounting_chain/list_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/list_logger.py -------------------------------------------------------------------------------- /discounting_chain/meta_a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/meta_a2c.py -------------------------------------------------------------------------------- /discounting_chain/nets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/nets.py -------------------------------------------------------------------------------- /discounting_chain/nets_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/nets_split.py -------------------------------------------------------------------------------- /discounting_chain/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain/train_utils.py -------------------------------------------------------------------------------- /discounting_chain_train.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/discounting_chain_train.ipynb -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/mypy.ini -------------------------------------------------------------------------------- /plots/discounting_chain/dc_chain_discount_factor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/discounting_chain/dc_chain_discount_factor.png -------------------------------------------------------------------------------- /plots/discounting_chain/dc_chain_discount_factor_appendix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/discounting_chain/dc_chain_discount_factor_appendix.png -------------------------------------------------------------------------------- /plots/discounting_chain/dc_chain_outer_loss_advantage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/discounting_chain/dc_chain_outer_loss_advantage.png -------------------------------------------------------------------------------- /plots/discounting_chain/dc_chain_outer_loss_advantage_appendix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/discounting_chain/dc_chain_outer_loss_advantage_appendix.png -------------------------------------------------------------------------------- /plots/discounting_chain/dc_chain_return.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/discounting_chain/dc_chain_return.png -------------------------------------------------------------------------------- /plots/discounting_chain/dc_chain_return_appendix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/discounting_chain/dc_chain_return_appendix.png -------------------------------------------------------------------------------- /plots/snake/snake_discount_factor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/snake/snake_discount_factor.png -------------------------------------------------------------------------------- /plots/snake/snake_discount_factor_appendix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/snake/snake_discount_factor_appendix.png -------------------------------------------------------------------------------- /plots/snake/snake_outer_loss_advantage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/snake/snake_outer_loss_advantage.png -------------------------------------------------------------------------------- /plots/snake/snake_outer_loss_advantage_appendix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/snake/snake_outer_loss_advantage_appendix.png -------------------------------------------------------------------------------- /plots/snake/snake_return.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/snake/snake_return.png -------------------------------------------------------------------------------- /plots/snake/snake_return_appendix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/plots/snake/snake_return_appendix.png -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/requirements.txt -------------------------------------------------------------------------------- /snake.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake.ipynb -------------------------------------------------------------------------------- /snake/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /snake/agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/agent/__init__.py -------------------------------------------------------------------------------- /snake/agent/a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/agent/a2c.py -------------------------------------------------------------------------------- /snake/agent/actor_critic_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/agent/actor_critic_agent.py -------------------------------------------------------------------------------- /snake/agent/gae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/agent/gae.py -------------------------------------------------------------------------------- /snake/agent/meta_a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/agent/meta_a2c.py -------------------------------------------------------------------------------- /snake/configs/agent/a2c.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/configs/agent/a2c.yaml -------------------------------------------------------------------------------- /snake/configs/agent/bootstrap.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/configs/agent/bootstrap.yaml -------------------------------------------------------------------------------- /snake/configs/agent/mgrl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/configs/agent/mgrl.yaml -------------------------------------------------------------------------------- /snake/configs/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/configs/config.yaml -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2150__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2150__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2156__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2156__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2158__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2158__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2162__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2162__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2165__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2165__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2168__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2168__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2171__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2171__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2175__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2175__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2177__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2177__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_gamma/MET-2180__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_gamma/MET-2180__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2150__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2150__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2156__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2156__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2158__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2158__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2162__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2162__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2165__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2165__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2168__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2168__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2171__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2171__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2175__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2175__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2177__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2177__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/a2c_return/MET-2180__eval_episode_reward_determinist_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/a2c_return/MET-2180__eval_episode_reward_determinist_policy.csv -------------------------------------------------------------------------------- /snake/data/appendix/bias/bias_bootstrap_no_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/bias/bias_bootstrap_no_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/bias/bias_bootstrap_no_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/bias/bias_bootstrap_no_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/bias/bias_bootstrap_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/bias/bias_bootstrap_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/bias/bias_bootstrap_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/bias/bias_bootstrap_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/bias/bias_mgrl_no_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/bias/bias_mgrl_no_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/bias/bias_mgrl_no_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/bias/bias_mgrl_no_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/bias/bias_mgrl_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/bias/bias_mgrl_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/bias/bias_mgrl_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/bias/bias_mgrl_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_a2c_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_a2c_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_a2c_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_a2c_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_bootstrap_no_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_bootstrap_no_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_bootstrap_no_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_bootstrap_no_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_bootstrap_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_bootstrap_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_bootstrap_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_bootstrap_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_mgrl_no_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_mgrl_no_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_mgrl_no_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_mgrl_no_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_mgrl_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_mgrl_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/gamma/gamma_mgrl_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/gamma/gamma_mgrl_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_a2c_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_a2c_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_a2c_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_a2c_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_bootstrap_no_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_bootstrap_no_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_bootstrap_no_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_bootstrap_no_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_bootstrap_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_bootstrap_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_bootstrap_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_bootstrap_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_mgrl_no_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_mgrl_no_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_mgrl_no_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_mgrl_no_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_mgrl_outer_no_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_mgrl_outer_no_norm.csv -------------------------------------------------------------------------------- /snake/data/appendix/return/return_mgrl_outer_norm.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/appendix/return/return_mgrl_outer_norm.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2154__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2154__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2160__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2160__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2166__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2166__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2172__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2172__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2178__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2178__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2183__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2183__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2188__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2188__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2192__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2192__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2196__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2196__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_bias/MET-2199__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_bias/MET-2199__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2154__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2154__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2160__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2160__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2166__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2166__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2172__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2172__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2178__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2178__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2183__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2183__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2188__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2188__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2192__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2192__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2196__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2196__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_gamma/MET-2199__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_gamma/MET-2199__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2153__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2153__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2161__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2161__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2167__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2167__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2173__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2173__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2181__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2181__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2185__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2185__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2190__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2190__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2195__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2195__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2198__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2198__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_bias/MET-2200__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_bias/MET-2200__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2153__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2153__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2161__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2161__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2167__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2167__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2173__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2173__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2181__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2181__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2185__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2185__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2190__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2190__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2195__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2195__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2198__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2198__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_gamma/MET-2200__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_gamma/MET-2200__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2153__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2153__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2161__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2161__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2167__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2167__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2173__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2173__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2181__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2181__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2185__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2185__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2190__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2190__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2195__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2195__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2198__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2198__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_outer_critic_return/MET-2200__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_outer_critic_return/MET-2200__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2154__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2154__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2160__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2160__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2166__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2166__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2172__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2172__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2178__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2178__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2183__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2183__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2188__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2188__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2192__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2192__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2196__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2196__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/bootstrap_return/MET-2199__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/bootstrap_return/MET-2199__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2151__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2151__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2157__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2157__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2163__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2163__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2169__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2169__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2174__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2174__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2179__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2179__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2184__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2184__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2187__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2187__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2191__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2191__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_bias/MET-2194__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_bias/MET-2194__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2151__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2151__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2157__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2157__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2163__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2163__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2169__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2169__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2174__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2174__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2179__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2179__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2184__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2184__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2187__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2187__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2191__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2191__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_gamma/MET-2194__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_gamma/MET-2194__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2152__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2152__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2159__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2159__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2164__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2164__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2170__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2170__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2176__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2176__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2182__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2182__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2186__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2186__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2189__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2189__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2193__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2193__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_bias/MET-2197__train_mean_advantage_outer_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_bias/MET-2197__train_mean_advantage_outer_loss.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2152__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2152__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2159__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2159__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2164__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2164__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2170__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2170__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2176__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2176__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2182__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2182__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2186__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2186__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2189__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2189__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2193__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2193__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_gamma/MET-2197__train_mean_gamma.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_gamma/MET-2197__train_mean_gamma.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2152__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2152__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2159__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2159__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2164__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2164__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2170__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2170__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2176__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2176__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2182__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2182__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2186__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2186__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2189__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2189__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2193__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2193__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_outer_critic_return/MET-2197__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_outer_critic_return/MET-2197__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2151__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2151__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2157__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2157__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2163__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2163__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2169__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2169__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2174__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2174__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2179__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2179__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2184__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2184__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2187__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2187__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2191__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2191__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/data/mgrl_return/MET-2194__eval_episode_reward_stochastic_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/data/mgrl_return/MET-2194__eval_episode_reward_stochastic_policy.csv -------------------------------------------------------------------------------- /snake/env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/env/__init__.py -------------------------------------------------------------------------------- /snake/env/snake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/env/snake.py -------------------------------------------------------------------------------- /snake/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/networks/__init__.py -------------------------------------------------------------------------------- /snake/networks/actor_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/networks/actor_critic.py -------------------------------------------------------------------------------- /snake/networks/cnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/networks/cnn.py -------------------------------------------------------------------------------- /snake/networks/distribution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/networks/distribution.py -------------------------------------------------------------------------------- /snake/networks/snake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/networks/snake.py -------------------------------------------------------------------------------- /snake/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /snake/training/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/training/config.py -------------------------------------------------------------------------------- /snake/training/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/training/evaluator.py -------------------------------------------------------------------------------- /snake/training/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/training/logger.py -------------------------------------------------------------------------------- /snake/training/setup_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/training/setup_run.py -------------------------------------------------------------------------------- /snake/training/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/training/types.py -------------------------------------------------------------------------------- /snake/training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake/training/utils.py -------------------------------------------------------------------------------- /snake_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instadeepai/outer-value-function-meta-rl/HEAD/snake_train.py --------------------------------------------------------------------------------