The response has been limited to 50k tokens of the smallest files in the repo. You can remove this limitation by removing the max tokens filter.
├── .copyright.hook
├── .github
    ├── Aircraft.gif
    ├── Breakout.gif
    ├── Half-Cheetah.gif
    ├── PARL-logo.png
    ├── abstractions.png
    ├── decorator.png
    └── workflows
    │   └── unittest.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── .teamcity
    ├── CHANGELOG.md
    ├── Dockerfile
    ├── action_build.sh
    ├── build.sh
    ├── requirements.txt
    ├── requirements_fluid.txt
    ├── requirements_torch.txt
    ├── update_readme_paddle_version.py
    ├── windows_build.bat
    ├── windows_requirements_fluid.txt
    ├── windows_requirements_paddle.txt
    └── windows_test.sh
├── CMakeLists.txt
├── LICENSE
├── MANIFEST.in
├── README.cn.md
├── README.md
├── benchmark
    ├── fluid
    │   ├── A2C
    │   │   ├── README.md
    │   │   ├── a2c_config.py
    │   │   ├── actor.py
    │   │   ├── atari_agent.py
    │   │   ├── atari_model.py
    │   │   ├── learning_curve.png
    │   │   ├── result.png
    │   │   └── train.py
    │   ├── DDPG
    │   │   ├── .benchmark
    │   │   │   ├── DDPG_HalfCheetah-v2.png
    │   │   │   ├── DDPG_Hopper-v2.png
    │   │   │   └── DDPG_Humanoid-v2.png
    │   │   ├── README.md
    │   │   ├── mujoco_agent.py
    │   │   ├── mujoco_model.py
    │   │   └── train.py
    │   ├── DQN
    │   │   ├── README.md
    │   │   ├── cartpole.jpg
    │   │   ├── cartpole_agent.py
    │   │   ├── cartpole_model.py
    │   │   ├── replay_memory.py
    │   │   ├── train.py
    │   │   ├── train_on_xpu.md
    │   │   └── train_with_xpu.py
    │   ├── DQN_variant
    │   │   ├── .benchmark
    │   │   │   ├── merge.png
    │   │   │   └── table.png
    │   │   ├── README.md
    │   │   ├── atari.py
    │   │   ├── atari_agent.py
    │   │   ├── atari_model.py
    │   │   ├── atari_wrapper.py
    │   │   ├── replay_memory.py
    │   │   ├── rom_files
    │   │   │   ├── battle_zone.bin
    │   │   │   ├── breakout.bin
    │   │   │   └── pong.bin
    │   │   ├── train.py
    │   │   └── utils.py
    │   ├── ES
    │   │   ├── README.md
    │   │   ├── actor.py
    │   │   ├── es.py
    │   │   ├── es_config.py
    │   │   ├── learning_curve.png
    │   │   ├── mujoco_agent.py
    │   │   ├── mujoco_model.py
    │   │   ├── noise.py
    │   │   ├── obs_filter.py
    │   │   ├── optimizers.py
    │   │   ├── train.py
    │   │   └── utils.py
    │   ├── GA3C
    │   │   ├── .benchmark
    │   │   │   ├── GA3C_BeamRider.jpg
    │   │   │   ├── GA3C_Breakout.jpg
    │   │   │   ├── GA3C_Pong.jpg
    │   │   │   ├── GA3C_Qbert.jpg
    │   │   │   └── GA3C_SpaceInvaders.jpg
    │   │   ├── README.md
    │   │   ├── actor.py
    │   │   ├── atari_agent.py
    │   │   ├── atari_model.py
    │   │   ├── ga3c_config.py
    │   │   └── train.py
    │   ├── IMPALA
    │   │   ├── .benchmark
    │   │   │   ├── IMPALA_BeamRider.jpg
    │   │   │   ├── IMPALA_Breakout.jpg
    │   │   │   ├── IMPALA_Pong.jpg
    │   │   │   ├── IMPALA_Qbert.jpg
    │   │   │   └── IMPALA_SpaceInvaders.jpg
    │   │   ├── README.md
    │   │   ├── actor.py
    │   │   ├── atari_agent.py
    │   │   ├── atari_model.py
    │   │   ├── impala_config.py
    │   │   └── train.py
    │   ├── MADDPG
    │   │   ├── .benchmark
    │   │   │   ├── MADDPG_simple.gif
    │   │   │   ├── MADDPG_simple.png
    │   │   │   ├── MADDPG_simple_adversary.gif
    │   │   │   ├── MADDPG_simple_adversary.png
    │   │   │   ├── MADDPG_simple_crypto.png
    │   │   │   ├── MADDPG_simple_push.gif
    │   │   │   ├── MADDPG_simple_push.png
    │   │   │   ├── MADDPG_simple_reference.gif
    │   │   │   ├── MADDPG_simple_reference.png
    │   │   │   ├── MADDPG_simple_speaker_listener.gif
    │   │   │   ├── MADDPG_simple_speaker_listener.png
    │   │   │   ├── MADDPG_simple_spread.gif
    │   │   │   ├── MADDPG_simple_spread.png
    │   │   │   ├── MADDPG_simple_tag.gif
    │   │   │   ├── MADDPG_simple_tag.png
    │   │   │   ├── MADDPG_simple_world_comm.gif
    │   │   │   └── MADDPG_simple_world_comm.png
    │   │   ├── README.md
    │   │   ├── simple_agent.py
    │   │   ├── simple_model.py
    │   │   └── train.py
    │   ├── PPO
    │   │   ├── .benchmark
    │   │   │   ├── PPO_HalfCheetah-v2.png
    │   │   │   └── PPO_Hopper-v2.png
    │   │   ├── README.md
    │   │   ├── mujoco_agent.py
    │   │   ├── mujoco_model.py
    │   │   ├── scaler.py
    │   │   └── train.py
    │   ├── Prioritized_DQN
    │   │   ├── README.md
    │   │   ├── atari.py
    │   │   ├── atari_agent.py
    │   │   ├── atari_model.py
    │   │   ├── atari_wrapper.py
    │   │   ├── per_alg.py
    │   │   ├── proportional_per.py
    │   │   ├── result.png
    │   │   ├── rom_files
    │   │   ├── train.py
    │   │   └── utils.py
    │   ├── QMIX
    │   │   ├── README.md
    │   │   ├── env_wrapper.py
    │   │   ├── images
    │   │   │   └── paddle-qmix-result.png
    │   │   ├── qmix_agent.py
    │   │   ├── qmix_config.py
    │   │   ├── qmixer_model.py
    │   │   ├── replay_buffer.py
    │   │   ├── rnn_model.py
    │   │   ├── train.py
    │   │   └── utils.py
    │   ├── QuickStart
    │   │   ├── README.md
    │   │   ├── cartpole_agent.py
    │   │   ├── cartpole_model.py
    │   │   ├── performance.gif
    │   │   └── train.py
    │   ├── SAC
    │   │   ├── .benchmark
    │   │   │   └── merge.png
    │   │   ├── README.md
    │   │   ├── mujoco_agent.py
    │   │   ├── mujoco_model.py
    │   │   └── train.py
    │   ├── TD3
    │   │   ├── .benchmark
    │   │   │   └── merge.png
    │   │   ├── README.md
    │   │   ├── mujoco_agent.py
    │   │   ├── mujoco_model.py
    │   │   └── train.py
    │   └── offline-Q-learning
    │   │   ├── README.md
    │   │   ├── atari.py
    │   │   ├── atari_agent.py
    │   │   ├── atari_model.py
    │   │   ├── atari_wrapper.py
    │   │   ├── dqn.py
    │   │   ├── parallel_run.py
    │   │   ├── replay_memory.py
    │   │   ├── rom_files
    │   │   └── utils.py
    └── torch
    │   ├── AlphaZero
    │       ├── .pic
    │       │   ├── good_moves.png
    │       │   └── perfect_moves.png
    │       ├── Arena.py
    │       ├── Coach.py
    │       ├── MCTS.py
    │       ├── README.md
    │       ├── actor.py
    │       ├── alphazero_agent.py
    │       ├── connect4_game.py
    │       ├── connect4_model.py
    │       ├── gen_submission.py
    │       ├── main.py
    │       ├── submission_template.py
    │       └── utils.py
    │   ├── DT
    │       ├── README.md
    │       ├── agent.py
    │       ├── data
    │       │   └── download_d4rl_datasets.py
    │       ├── data_loader.py
    │       ├── evaluate_episodes.py
    │       ├── model.py
    │       ├── train.py
    │       └── trajectory_gpt2.py
    │   ├── ES
    │       ├── README.md
    │       ├── actor.py
    │       ├── es.py
    │       ├── es_config.py
    │       ├── mujoco_agent.py
    │       ├── mujoco_model.py
    │       ├── noise.py
    │       ├── obs_filter.py
    │       ├── optimizers.py
    │       ├── train.py
    │       └── utils.py
    │   ├── NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge
    │       ├── README.md
    │       ├── track1
    │       │   ├── README.md
    │       │   ├── evaluate.py
    │       │   ├── powernet_model.py
    │       │   ├── rl_agent.py
    │       │   └── utils.py
    │       └── track2
    │       │   ├── README.md
    │       │   ├── evaluate.py
    │       │   ├── powernet_model.py
    │       │   ├── rl_agent.py
    │       │   └── utils.py
    │   ├── QuickStart
    │       ├── README.md
    │       ├── cartpole_agent.py
    │       ├── cartpole_model.py
    │       └── train.py
    │   ├── a2c
    │       ├── .result
    │       │   ├── result_a2c_torch0.png
    │       │   └── result_a2c_torch1.png
    │       ├── README.md
    │       ├── a2c_config.py
    │       ├── actor.py
    │       ├── atari_agent.py
    │       ├── atari_model.py
    │       └── train.py
    │   ├── coma
    │       ├── .benchmark
    │       │   └── 3m_result.png
    │       ├── README.md
    │       ├── coma_config.py
    │       ├── sc2_agent.py
    │       ├── sc2_model.py
    │       ├── starcraft2
    │       │   ├── Dockerfile
    │       │   ├── build_docker.sh
    │       │   └── install_sc2.sh
    │       └── train.py
    │   ├── cql
    │       ├── README.md
    │       ├── mujoco_agent.py
    │       ├── mujoco_model.py
    │       └── train.py
    │   ├── ddpg
    │       ├── README.md
    │       ├── mujoco_agent.py
    │       ├── mujoco_model.py
    │       └── train.py
    │   ├── dqn
    │       ├── .benchmark
    │       │   └── dqn.png
    │       ├── README.md
    │       ├── agent.py
    │       ├── model.py
    │       ├── replay_memory.py
    │       └── train.py
    │   ├── iql
    │       ├── README.md
    │       ├── mujoco_agent.py
    │       ├── mujoco_model.py
    │       ├── replay_buffer.py
    │       └── train.py
    │   ├── maddpg
    │       ├── README.md
    │       ├── simple_agent.py
    │       ├── simple_model.py
    │       └── train.py
    │   ├── maml++
    │       ├── .benchmark
    │       │   └── loss.png
    │       ├── README.md
    │       ├── config.py
    │       ├── data.py
    │       ├── maml_agent.py
    │       ├── maml_algorithm.py
    │       ├── maml_model.py
    │       └── train.py
    │   ├── mappo
    │       ├── README.md
    │       ├── env_wrappers.py
    │       ├── mappo_buffer.py
    │       ├── simple_agent.py
    │       ├── simple_model.py
    │       └── train.py
    │   ├── oac
    │       ├── README.md
    │       ├── mujoco_agent.py
    │       ├── mujoco_model.py
    │       └── train.py
    │   ├── ppo
    │       ├── README.md
    │       ├── agent.py
    │       ├── atari_config.py
    │       ├── atari_model.py
    │       ├── env_utils.py
    │       ├── mujoco_config.py
    │       ├── mujoco_model.py
    │       ├── storage.py
    │       └── train.py
    │   ├── qmix
    │       ├── README.md
    │       ├── env_wrapper.py
    │       ├── images
    │       │   └── torch-qmix-result.png
    │       ├── qmix_agent.py
    │       ├── qmix_config.py
    │       ├── qmixer_model.py
    │       ├── replay_buffer.py
    │       ├── rnn_model.py
    │       ├── train.py
    │       └── utils.py
    │   ├── sac
    │       ├── README.md
    │       ├── mujoco_agent.py
    │       ├── mujoco_model.py
    │       └── train.py
    │   └── td3
    │       ├── README.md
    │       ├── mujoco_agent.py
    │       ├── mujoco_model.py
    │       └── train.py
├── docs
    ├── EvoKit
    │   ├── minimal_example.rst
    │   ├── online_example.rst
    │   └── overview.rst
    ├── Makefile
    ├── _static
    │   ├── build_toc_group.js
    │   └── pygments.css
    ├── api_docs
    │   ├── index.rst
    │   └── utils.rst
    ├── apis
    │   ├── agent.rst
    │   ├── algorithm.rst
    │   ├── connect.rst
    │   ├── model.rst
    │   └── remote_class.rst
    ├── basic_structure
    │   ├── agent.rst
    │   ├── algorithm.rst
    │   ├── model.rst
    │   └── overview.rst
    ├── conf.py
    ├── features.rst
    ├── images
    │   ├── PARL-logo-1.png
    │   ├── PARL-logo-2.png
    │   ├── bar.png
    │   └── quickstart.png
    ├── implementations
    │   ├── a2c.rst
    │   ├── a3c.rst
    │   ├── ddpg.rst
    │   ├── ddqn.rst
    │   ├── dqn.rst
    │   ├── impala.rst
    │   ├── maddpg.rst
    │   ├── new_alg.rst
    │   ├── oac.rst
    │   ├── pg.rst
    │   ├── ppo.rst
    │   ├── qmix.rst
    │   ├── sac.rst
    │   └── td3.rst
    ├── index.rst
    ├── installation.rst
    ├── installation_guide.md
    ├── installation_guide_cn.md
    ├── locale
    │   └── zh_CN
    │   │   └── LC_MESSAGES
    │   │       ├── EvoKit.po
    │   │       ├── api_docs.po
    │   │       ├── apis.po
    │   │       ├── basic_structure.po
    │   │       ├── features.po
    │   │       ├── implementations.po
    │   │       ├── index.po
    │   │       ├── installation.po
    │   │       ├── overview.po
    │   │       ├── parallel_training.po
    │   │       └── tutorial.po
    ├── overview
    │   ├── abstractions.rst
    │   ├── features.rst
    │   └── parallelization.rst
    ├── parallel_training
    │   ├── comparison.png
    │   ├── debug.rst
    │   ├── elapsed_time.jpg
    │   ├── file_distribution.rst
    │   ├── gpu_cluster.rst
    │   ├── overview.rst
    │   ├── poster.png
    │   ├── recommended_practice1.rst
    │   ├── recommended_practice2.rst
    │   ├── serialization.rst
    │   └── setup.rst
    ├── questions
    │   └── distributed_training.rst
    ├── requirements.txt
    ├── test.sh
    ├── tutorial
    │   ├── add_histogram.jpg
    │   ├── add_scalar.jpg
    │   ├── getting_started.rst
    │   ├── maa.rst
    │   ├── output_as_csv.rst
    │   ├── save_param.rst
    │   └── tensorboard.rst
    └── zh_CN
    │   ├── Overview.md
    │   ├── tutorial
    │       ├── csv_logger.md
    │       ├── module.md
    │       ├── param.md
    │       ├── quick_start.md
    │       └── summary.md
    │   └── xparl
    │       ├── .images
    │           ├── dataset1.png
    │           ├── dataset2.png
    │           ├── dataset3.png
    │           ├── dataset4.png
    │           └── log_server.png
    │       ├── debug.md
    │       ├── distribute_files.md
    │       ├── example1.md
    │       ├── example2.md
    │       ├── introduction.md
    │       ├── serialize.md
    │       └── tutorial.md
├── evo_kit
    ├── CMakeLists.txt
    ├── DeepES.gif
    ├── README.md
    ├── benchmark
    │   └── cartpole.h
    ├── cmake
    │   └── Torch
    │   │   └── EvoKitConfig.cmake
    ├── core
    │   ├── include
    │   │   └── evo_kit
    │   │   │   ├── adam_optimizer.h
    │   │   │   ├── cached_gaussian_sampling.h
    │   │   │   ├── gaussian_sampling.h
    │   │   │   ├── optimizer.h
    │   │   │   ├── optimizer_factory.h
    │   │   │   ├── sampling_factory.h
    │   │   │   ├── sampling_method.h
    │   │   │   ├── sgd_optimizer.h
    │   │   │   └── utils.h
    │   ├── proto
    │   │   └── evo_kit
    │   │   │   └── evo_kit.proto
    │   └── src
    │   │   ├── adam_optimizer.cc
    │   │   ├── cached_gaussian_sampling.cc
    │   │   ├── gaussian_sampling.cc
    │   │   ├── optimizer_factory.cc
    │   │   ├── sampling_factory.cc
    │   │   ├── sgd_optimizer.cc
    │   │   └── utils.cc
    ├── demo
    │   ├── cartpole_config.prototxt
    │   ├── paddle
    │   │   ├── cartpole_async_solver.cc
    │   │   ├── cartpole_init_model.zip
    │   │   ├── cartpole_solver_parallel.cc
    │   │   └── gen_cartpole_init_model.py
    │   └── torch
    │   │   ├── CMakeLists.txt
    │   │   ├── cartpole_solver_parallel.cc
    │   │   └── model.h
    ├── paddle
    │   ├── include
    │   │   └── evo_kit
    │   │   │   ├── async_es_agent.h
    │   │   │   └── es_agent.h
    │   └── src
    │   │   ├── async_es_agent.cc
    │   │   └── es_agent.cc
    ├── scripts
    │   ├── build.sh
    │   └── lib_install.sh
    ├── test
    │   ├── CMakeLists.txt
    │   ├── include
    │   │   └── torch_demo_model.h
    │   ├── prototxt
    │   │   ├── torch_sin_cached_config.prototxt
    │   │   └── torch_sin_config.prototxt
    │   ├── run_test.sh
    │   ├── src
    │   │   ├── optimizers_test.cc
    │   │   ├── sampling_test.cc
    │   │   ├── torch_agent_test.cc
    │   │   └── utils_test.cc
    │   └── unit_test.cc
    └── torch
    │   └── include
    │       └── evo_kit
    │           └── es_agent.h
├── examples
    ├── A2C
    │   ├── .result
    │   │   ├── result_a2c_paddle0.png
    │   │   └── result_a2c_paddle1.png
    │   ├── README.md
    │   ├── a2c_config.py
    │   ├── actor.py
    │   ├── atari_agent.py
    │   ├── atari_model.py
    │   ├── requirements.txt
    │   └── train.py
    ├── AlphaZero
    │   ├── .pic
    │   │   ├── good_moves_rate.png
    │   │   └── perfect_moves_rate.png
    │   ├── Arena.py
    │   ├── Coach.py
    │   ├── MCTS.py
    │   ├── README.md
    │   ├── actor.py
    │   ├── alphazero_agent.py
    │   ├── connect4_aiplayer.py
    │   ├── connect4_game.py
    │   ├── connect4_model.py
    │   ├── main.py
    │   ├── requirements.txt
    │   └── utils.py
    ├── Baselines
    │   ├── GridDispatch_competition
    │   │   ├── README.md
    │   │   ├── paddle
    │   │   │   ├── README.md
    │   │   │   ├── env_wrapper.py
    │   │   │   ├── grid_agent.py
    │   │   │   ├── grid_model.py
    │   │   │   └── train.py
    │   │   └── torch
    │   │   │   ├── README.md
    │   │   │   ├── env_wrapper.py
    │   │   │   ├── grid_agent.py
    │   │   │   ├── grid_model.py
    │   │   │   └── train.py
    │   └── Halite_competition
    │   │   ├── paddle
    │   │       ├── README.md
    │   │       ├── config.py
    │   │       ├── encode_model.py
    │   │       ├── model
    │   │       │   └── latest_ship_model.pth
    │   │       ├── requirements.txt
    │   │       ├── rl_trainer
    │   │       │   ├── agent.py
    │   │       │   ├── algorithm.py
    │   │       │   ├── controller.py
    │   │       │   ├── model.py
    │   │       │   ├── obs_parser.py
    │   │       │   ├── policy.py
    │   │       │   ├── replay_memory.py
    │   │       │   └── utils.py
    │   │       ├── submission.py
    │   │       ├── test.ipynb
    │   │       ├── test.py
    │   │       └── train.py
    │   │   └── torch
    │   │       ├── README.md
    │   │       ├── config.py
    │   │       ├── encode_model.py
    │   │       ├── model
    │   │           └── latest_ship_model.pth
    │   │       ├── requirements.txt
    │   │       ├── rl_trainer
    │   │           ├── agent.py
    │   │           ├── algorithm.py
    │   │           ├── controller.py
    │   │           ├── model.py
    │   │           ├── obs_parser.py
    │   │           ├── policy.py
    │   │           ├── replay_memory.py
    │   │           └── utils.py
    │   │       ├── submission.py
    │   │       ├── test.ipynb
    │   │       ├── test.py
    │   │       └── train.py
    ├── CARLA_SAC
    │   ├── .benchmark
    │   │   ├── Lane_bend.gif
    │   │   └── carla_sac.png
    │   ├── README.md
    │   ├── carla_agent.py
    │   ├── carla_model.py
    │   ├── env_config.py
    │   ├── env_utils.py
    │   ├── evaluate.py
    │   ├── model.ckpt
    │   └── train.py
    ├── CQL
    │   ├── README.md
    │   ├── mujoco_agent.py
    │   ├── mujoco_model.py
    │   ├── requirements.txt
    │   └── train.py
    ├── DDPG
    │   ├── README.md
    │   ├── mujoco_agent.py
    │   ├── mujoco_model.py
    │   ├── requirements.txt
    │   └── train.py
    ├── DQN
    │   ├── README.md
    │   ├── cartpole.jpg
    │   ├── cartpole_agent.py
    │   ├── cartpole_model.py
    │   ├── requirements.txt
    │   └── train.py
    ├── DQN_variant
    │   ├── .benchmark
    │   │   └── Dueling DQN.png
    │   ├── README.md
    │   ├── atari_agent.py
    │   ├── atari_model.py
    │   ├── replay_memory.py
    │   ├── requirements.txt
    │   └── train.py
    ├── ES
    │   ├── README.md
    │   ├── actor.py
    │   ├── es.py
    │   ├── es_config.py
    │   ├── mujoco_agent.py
    │   ├── mujoco_model.py
    │   ├── noise.py
    │   ├── obs_filter.py
    │   ├── optimizers.py
    │   ├── requirements.txt
    │   ├── train.py
    │   └── utils.py
    ├── IMPALA
    │   ├── README.md
    │   ├── actor.py
    │   ├── atari_agent.py
    │   ├── atari_model.py
    │   ├── impala_config.py
    │   ├── requirements.txt
    │   └── train.py
    ├── MADDPG
    │   ├── README.md
    │   ├── requirements.txt
    │   ├── simple_agent.py
    │   ├── simple_model.py
    │   └── train.py
    ├── NeurIPS2018-AI-for-Prosthetics-Challenge
    │   ├── README.md
    │   ├── args.py
    │   ├── env_wrapper.py
    │   ├── final_submit
    │   │   ├── env_wrapper.py
    │   │   ├── mlp_model.py
    │   │   ├── pelvisBasedObs_scaler.npz
    │   │   ├── submit_model.py
    │   │   └── test.py
    │   ├── image
    │   │   ├── competition.png
    │   │   ├── curriculum-learning.png
    │   │   ├── demo.gif
    │   │   ├── fastest.png
    │   │   ├── last course.png
    │   │   └── velocity_distribution.png
    │   ├── multi_head_ddpg.py
    │   ├── opensim_agent.py
    │   ├── opensim_model.py
    │   ├── pelvisBasedObs_scaler.npz
    │   ├── replay_memory.py
    │   ├── simulator_client.py
    │   ├── simulator_pb2.py
    │   ├── simulator_pb2_grpc.py
    │   ├── simulator_server.py
    │   ├── test.py
    │   └── utils.py
    ├── NeurIPS2019-Learn-to-Move-Challenge
    │   ├── README.md
    │   ├── actor.py
    │   ├── env_wrapper.py
    │   ├── evaluate.py
    │   ├── evaluate_args.py
    │   ├── final_submit
    │   │   ├── env_wrapper.py
    │   │   ├── mlp_model.py
    │   │   ├── official_obs_scaler.npz
    │   │   ├── submit_model.py
    │   │   └── test.py
    │   ├── image
    │   │   └── performance.gif
    │   ├── official_obs_scaler.npz
    │   ├── opensim_agent.py
    │   ├── opensim_model.py
    │   ├── replay_memory.py
    │   ├── scripts
    │   │   ├── eval_difficulty1.sh
    │   │   ├── eval_difficulty2.sh
    │   │   ├── eval_difficulty3.sh
    │   │   ├── eval_difficulty3_first_target.sh
    │   │   ├── train_difficulty1.sh
    │   │   ├── train_difficulty2.sh
    │   │   ├── train_difficulty3.sh
    │   │   └── train_difficulty3_first_target.sh
    │   ├── train.py
    │   └── train_args.py
    ├── NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge
    │   ├── README.md
    │   ├── images
    │   │   └── l2rpn.jpeg
    │   ├── track1
    │   │   ├── README.md
    │   │   ├── agent.py
    │   │   ├── es.py
    │   │   ├── es_agent.py
    │   │   ├── evaluate.py
    │   │   ├── powernet_model.py
    │   │   └── utils.py
    │   └── track2
    │   │   ├── README.md
    │   │   ├── agent.py
    │   │   ├── es.py
    │   │   ├── es_agent.py
    │   │   ├── evaluate.py
    │   │   ├── powernet_model.py
    │   │   └── utils.py
    ├── OAC
    │   ├── README.md
    │   ├── mujoco_agent.py
    │   ├── mujoco_model.py
    │   ├── requirements.txt
    │   └── train.py
    ├── PPO
    │   ├── README.md
    │   ├── agent.py
    │   ├── atari_config.py
    │   ├── atari_model.py
    │   ├── env_utils.py
    │   ├── mujoco_config.py
    │   ├── mujoco_model.py
    │   ├── requirements_atari.txt
    │   ├── requirements_mujoco.txt
    │   ├── storage.py
    │   └── train.py
    ├── QMIX
    │   ├── README.md
    │   ├── env_wrapper.py
    │   ├── images
    │   │   └── paddle2.0_qmix_result.png
    │   ├── qmix_agent.py
    │   ├── qmix_config.py
    │   ├── qmixer_model.py
    │   ├── replay_buffer.py
    │   ├── requirements.txt
    │   ├── rnn_model.py
    │   ├── train.py
    │   └── utils.py
    ├── QuickStart
    │   ├── README.md
    │   ├── cartpole_agent.py
    │   ├── cartpole_model.py
    │   ├── performance.gif
    │   ├── requirements.txt
    │   └── train.py
    ├── SAC
    │   ├── README.md
    │   ├── mujoco_agent.py
    │   ├── mujoco_model.py
    │   ├── requirements.txt
    │   └── train.py
    ├── TD3
    │   ├── README.md
    │   ├── mujoco_agent.py
    │   ├── mujoco_model.py
    │   ├── requirements.txt
    │   └── train.py
    ├── others
    │   └── deepes.py
    └── tutorials
    │   ├── README.md
    │   ├── homework
    │       ├── lesson2
    │       │   ├── q_learning_frozenlake
    │       │   │   ├── agent.py
    │       │   │   ├── gridworld.py
    │       │   │   └── train.py
    │       │   └── sarsa_frozenlake
    │       │   │   ├── agent.py
    │       │   │   ├── gridworld.py
    │       │   │   └── train.py
    │       ├── lesson3
    │       │   └── dqn_mountaincar
    │       │   │   ├── agent.py
    │       │   │   ├── model.py
    │       │   │   ├── replay_memory.py
    │       │   │   └── train.py
    │       ├── lesson4
    │       │   └── policy_gradient_pong
    │       │   │   ├── agent.py
    │       │   │   ├── model.py
    │       │   │   └── train.py
    │       └── lesson5
    │       │   └── ddpg_quadrotor
    │       │       ├── quadrotor_agent.py
    │       │       ├── quadrotor_model.py
    │       │       └── train.py
    │   ├── lesson1
    │       └── gridworld.py
    │   ├── lesson2
    │       ├── q_learning
    │       │   ├── agent.py
    │       │   ├── gridworld.py
    │       │   └── train.py
    │       └── sarsa
    │       │   ├── agent.py
    │       │   ├── gridworld.py
    │       │   └── train.py
    │   ├── lesson3
    │       └── dqn
    │       │   ├── agent.py
    │       │   ├── algorithm.py
    │       │   ├── model.py
    │       │   ├── replay_memory.py
    │       │   └── train.py
    │   ├── lesson4
    │       └── policy_gradient
    │       │   ├── agent.py
    │       │   ├── algorithm.py
    │       │   ├── model.py
    │       │   └── train.py
    │   ├── lesson5
    │       └── ddpg
    │       │   ├── agent.py
    │       │   ├── algorithm.py
    │       │   ├── env.py
    │       │   ├── model.py
    │       │   ├── replay_memory.py
    │       │   └── train.py
    │   ├── parl2_dygraph
    │       ├── README.md
    │       ├── lesson3
    │       │   ├── dqn
    │       │   │   ├── agent.py
    │       │   │   ├── algorithm.py
    │       │   │   ├── model.py
    │       │   │   ├── replay_memory.py
    │       │   │   └── train.py
    │       │   └── homework
    │       │   │   └── dqn_mountaincar
    │       │   │       ├── agent.py
    │       │   │       ├── model.py
    │       │   │       ├── replay_memory.py
    │       │   │       └── train.py
    │       ├── lesson4
    │       │   ├── homework
    │       │   │   └── policy_gradient_pong
    │       │   │   │   ├── agent.py
    │       │   │   │   ├── model.py
    │       │   │   │   └── train.py
    │       │   └── policy_gradient
    │       │   │   ├── agent.py
    │       │   │   ├── algorithm.py
    │       │   │   ├── model.py
    │       │   │   └── train.py
    │       ├── lesson5
    │       │   ├── ddpg
    │       │   │   ├── agent.py
    │       │   │   ├── algorithm.py
    │       │   │   ├── env.py
    │       │   │   ├── model.py
    │       │   │   ├── replay_memory.py
    │       │   │   └── train.py
    │       │   └── homework
    │       │   │   └── ddpg_quadrotor
    │       │   │       ├── quadrotor_agent.py
    │       │   │       ├── quadrotor_model.py
    │       │   │       └── train.py
    │       └── requirements.txt
    │   └── requirements.txt
├── papers
    ├── AAAI_2020.md
    ├── DeepMind2020.md
    ├── ICLR_2020.md
    ├── ICLR_2021.md
    ├── NeurIPS 2019 RL papers.numbers
    ├── NeurIPS_2020.md
    └── archive.md
├── parl
    ├── __init__.py
    ├── algorithms
    │   ├── __init__.py
    │   ├── fluid
    │   │   ├── __init__.py
    │   │   ├── a3c.py
    │   │   ├── ddpg.py
    │   │   ├── ddqn.py
    │   │   ├── dqn.py
    │   │   ├── impala
    │   │   │   ├── __init__.py
    │   │   │   ├── impala.py
    │   │   │   ├── tests
    │   │   │   │   └── vtrace_test_fluid.py
    │   │   │   └── vtrace.py
    │   │   ├── maddpg.py
    │   │   ├── policy_gradient.py
    │   │   ├── ppo.py
    │   │   ├── qmix.py
    │   │   ├── sac.py
    │   │   ├── td3.py
    │   │   └── tests
    │   │   │   └── algs_test_fluid.py
    │   ├── paddle
    │   │   ├── __init__.py
    │   │   ├── a2c.py
    │   │   ├── cql.py
    │   │   ├── ddpg.py
    │   │   ├── ddqn.py
    │   │   ├── dqn.py
    │   │   ├── impala
    │   │   │   ├── __init__.py
    │   │   │   ├── impala.py
    │   │   │   ├── tests
    │   │   │   │   └── vtrace_test_paddle.py
    │   │   │   └── vtrace.py
    │   │   ├── maddpg.py
    │   │   ├── oac.py
    │   │   ├── policy_gradient.py
    │   │   ├── ppo.py
    │   │   ├── qmix.py
    │   │   ├── sac.py
    │   │   └── td3.py
    │   └── torch
    │   │   ├── __init__.py
    │   │   ├── a2c.py
    │   │   ├── coma.py
    │   │   ├── cql.py
    │   │   ├── ddpg.py
    │   │   ├── ddqn.py
    │   │   ├── dqn.py
    │   │   ├── dt.py
    │   │   ├── iql.py
    │   │   ├── maddpg.py
    │   │   ├── mappo.py
    │   │   ├── oac.py
    │   │   ├── policy_gradient.py
    │   │   ├── ppo.py
    │   │   ├── qmix.py
    │   │   ├── sac.py
    │   │   └── td3.py
    ├── core
    │   ├── __init__.py
    │   ├── agent_base.py
    │   ├── algorithm_base.py
    │   ├── fluid
    │   │   ├── __init__.py
    │   │   ├── agent.py
    │   │   ├── algorithm.py
    │   │   ├── layers
    │   │   │   ├── __init__.py
    │   │   │   ├── attr_holder.py
    │   │   │   ├── layer_wrappers.py
    │   │   │   └── tests
    │   │   │   │   ├── param_name_test_fluid.py
    │   │   │   │   └── param_sharing_test_fluid.py
    │   │   ├── model.py
    │   │   ├── model_helper.py
    │   │   ├── plutils
    │   │   │   ├── __init__.py
    │   │   │   ├── common.py
    │   │   │   └── compiler.py
    │   │   ├── policy_distribution.py
    │   │   └── tests
    │   │   │   ├── agent_base_test_fluid.py
    │   │   │   ├── agent_model_ids_test_fluid.py
    │   │   │   ├── algorithm_base_test_fluid.py
    │   │   │   ├── fluid_gpu_actor_test_fluid.py
    │   │   │   ├── gru_cell_test_fluid.py
    │   │   │   ├── model_base_test_fluid.py
    │   │   │   ├── model_helper_test_fluid.py
    │   │   │   └── policy_distribution_test_fluid.py
    │   ├── model_base.py
    │   ├── paddle
    │   │   ├── __init__.py
    │   │   ├── agent.py
    │   │   ├── algorithm.py
    │   │   ├── model.py
    │   │   ├── policy_distribution.py
    │   │   └── tests
    │   │   │   ├── agent_base_actor_critic_test_paddle.py
    │   │   │   ├── agent_base_test_paddle.py
    │   │   │   ├── model_base_actor_critic_test_paddle.py
    │   │   │   ├── model_base_test_paddle.py
    │   │   │   └── policy_distribution_test_paddle.py
    │   ├── tests
    │   │   ├── agent_base_test.py
    │   │   ├── algorithm_base_test.py
    │   │   └── model_base_test.py
    │   └── torch
    │   │   ├── __init__.py
    │   │   ├── agent.py
    │   │   ├── algorithm.py
    │   │   ├── model.py
    │   │   ├── policy_distribution.py
    │   │   └── tests
    │   │       ├── agent_base_actor_critic_test_torch.py
    │   │       ├── agent_base_test_torch.py
    │   │       ├── model_base_test_torch.py
    │   │       ├── model_based_actor_critic_test_torch.py
    │   │       └── policy_distribution_test_torch.py
    ├── env
    │   ├── __init__.py
    │   ├── atari_wrappers.py
    │   ├── compat_wrappers.py
    │   ├── continuous_wrappers.py
    │   ├── mujoco_wrappers.py
    │   ├── multiagent_env.py
    │   ├── multiagent_simple_env.py
    │   ├── tests
    │   │   └── continuous_wrappers_test.py
    │   └── vector_env.py
    ├── remote
    │   ├── __init__.py
    │   ├── client.py
    │   ├── cluster_monitor.py
    │   ├── communication.py
    │   ├── compatible_trick.py
    │   ├── cpu_resource.py
    │   ├── exceptions.py
    │   ├── future_mode
    │   │   ├── __init__.py
    │   │   ├── future_object.py
    │   │   ├── proxy_wrapper_nowait.py
    │   │   └── tests
    │   │   │   ├── future_mode_cluster_2_test.py
    │   │   │   ├── future_mode_cluster_test.py
    │   │   │   ├── future_mode_get_set_attribute_2_test.py
    │   │   │   ├── future_mode_get_set_attribute_3_test.py
    │   │   │   ├── future_mode_get_set_attribute_test.py
    │   │   │   ├── future_object_test.py
    │   │   │   ├── gpu
    │   │   │       └── future_mode_cluster_test.py
    │   │   │   └── proxy_wrapper_nowait_test.py
    │   ├── gpu_resource.py
    │   ├── grpc_heartbeat
    │   │   ├── __init__.py
    │   │   ├── heartbeat.proto
    │   │   ├── heartbeat_client.py
    │   │   ├── heartbeat_pb2.py
    │   │   ├── heartbeat_pb2_grpc.py
    │   │   ├── heartbeat_server.py
    │   │   └── tests
    │   │   │   ├── heartbeat_client_arguments_test.py
    │   │   │   ├── heartbeat_server_arguments_test.py
    │   │   │   └── heartbeat_test.py
    │   ├── job.py
    │   ├── log_server.py
    │   ├── master.py
    │   ├── message.py
    │   ├── monitor.py
    │   ├── proxy_wrapper.py
    │   ├── remote_class_serialization.py
    │   ├── remote_constants.py
    │   ├── remote_decorator.py
    │   ├── remote_wrapper.py
    │   ├── scripts.py
    │   ├── start.py
    │   ├── static
    │   │   ├── css
    │   │   │   └── bootstrap-parl.min.css
    │   │   ├── favicon.ico
    │   │   ├── js
    │   │   │   ├── ansi_up.js
    │   │   │   ├── bootstrap-table.min.js
    │   │   │   ├── echarts.min.js
    │   │   │   ├── gpu-parl.js
    │   │   │   ├── jquery.ajax-cross-origin.min.js
    │   │   │   ├── jquery.min.js
    │   │   │   └── parl.js
    │   │   └── logo.png
    │   ├── status.py
    │   ├── templates
    │   │   ├── clients.html
    │   │   ├── gpu-workers.html
    │   │   ├── jobs.html
    │   │   └── workers.html
    │   ├── test_utils.py
    │   ├── tests
    │   │   ├── actor_max_memory_test.py
    │   │   ├── actor_status_test.py
    │   │   ├── actor_status_wait_mode_test.py
    │   │   ├── client_not_init_test.py
    │   │   ├── cluster_2_test.py
    │   │   ├── cluster_3_test.py
    │   │   ├── cluster_monitor_2_test.py
    │   │   ├── cluster_monitor_3_test.py
    │   │   ├── cluster_monitor_test.py
    │   │   ├── cluster_notebook_2_test.py
    │   │   ├── cluster_notebook_test.py
    │   │   ├── cluster_status_test.py
    │   │   ├── cluster_test.py
    │   │   ├── communication_test.py
    │   │   ├── get_set_attribute_2_test.py
    │   │   ├── get_set_attribute_3_test.py
    │   │   ├── get_set_attribute_notebook_test.py
    │   │   ├── get_set_attribute_test.py
    │   │   ├── gpu
    │   │   │   ├── cluster_test.py
    │   │   │   ├── proxy_wrapper_test.py
    │   │   │   ├── remote_class_test.py
    │   │   │   ├── worker_manager_test.py
    │   │   │   └── worker_test.py
    │   │   ├── locate_remote_file_test_alone.py
    │   │   ├── log_server_test.py
    │   │   ├── mocking_env_test.py
    │   │   ├── multiprocessing
    │   │   │   ├── cluster_multiprocessing_1_test.py
    │   │   │   └── cluster_multiprocessing_2_test.py
    │   │   ├── proxy_wrapper_test.py
    │   │   ├── recursive_actor_test.py
    │   │   ├── remote_class_test.py
    │   │   ├── reset_job_test.py
    │   │   ├── reset_job_test_alone.py
    │   │   ├── rom
    │   │   │   └── pong.bin
    │   │   ├── send_job_test.py
    │   │   ├── simulate_client.py
    │   │   ├── support_RegExp_test.py
    │   │   ├── sync_config_file_test.py
    │   │   ├── test_import_module
    │   │   │   ├── Module2.py
    │   │   │   ├── main_abs_test.py
    │   │   │   ├── main_test_alone.py
    │   │   │   └── subdir
    │   │   │   │   ├── Module.py
    │   │   │   │   └── __init__.py
    │   │   ├── utils_test.py
    │   │   ├── worker_manager_test.py
    │   │   └── worker_test.py
    │   ├── utils.py
    │   ├── worker.py
    │   ├── worker_manager.py
    │   └── zmq_utils.py
    ├── tests
    │   ├── gym.py
    │   └── import_test.py
    └── utils
    │   ├── __init__.py
    │   ├── csv_logger.py
    │   ├── deprecation.py
    │   ├── exceptions.py
    │   ├── globvars.py
    │   ├── logger.py
    │   ├── machine_info.py
    │   ├── np_utils.py
    │   ├── path_utils.py
    │   ├── replay_memory.py
    │   ├── rl_utils.py
    │   ├── scheduler.py
    │   ├── summary.py
    │   ├── tensorboard.py
    │   ├── test_utils.py
    │   ├── tests
    │       ├── csv_logger_test.py
    │       ├── globvar_test.py
    │       ├── logger_test.py
    │       ├── not_import_dl_framework_test.py
    │       ├── not_import_dl_framework_test_torch.py
    │       ├── scheduler_test.py
    │       └── summary_test.py
    │   ├── time_stat.py
    │   ├── utils.py
    │   ├── visualdl.py
    │   └── window_stat.py
├── setup.py
└── test_tipc
    ├── common_func.sh
    ├── configs
        ├── A2C
        │   └── train_infer_python.txt
        ├── CQL
        │   └── train_infer_python.txt
        ├── DDPG
        │   └── train_infer_python.txt
        ├── DQN
        │   └── train_infer_python.txt
        ├── DQN_variant
        │   └── train_infer_python.txt
        ├── ES
        │   └── train_infer_python.txt
        ├── MADDPG
        │   └── train_infer_python.txt
        ├── OAC
        │   └── train_infer_python.txt
        ├── PPO
        │   └── train_infer_python.txt
        ├── QuickStart
        │   └── train_infer_python.txt
        ├── SAC
        │   └── train_infer_python.txt
        └── TD3
        │   └── train_infer_python.txt
    ├── docs
        ├── test_tipc_log.png
        └── test_train_inference_python.md
    ├── prepare.sh
    ├── readme.md
    └── test_train_inference_python.sh


/.github/Aircraft.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/Aircraft.gif


--------------------------------------------------------------------------------
/.github/Breakout.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/Breakout.gif


--------------------------------------------------------------------------------
/.github/Half-Cheetah.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/Half-Cheetah.gif


--------------------------------------------------------------------------------
/.github/PARL-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/PARL-logo.png


--------------------------------------------------------------------------------
/.github/abstractions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/abstractions.png


--------------------------------------------------------------------------------
/.github/decorator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/decorator.png


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/Lucas-C/pre-commit-hooks.git
 3 |     sha: v1.0.1
 4 |     hooks:
 5 |     -   id: remove-crlf
 6 |         files: (?!.*third_party)^.*$ | (?!.*book)^.*$
 7 | -   repo: https://github.com/pre-commit/mirrors-yapf.git
 8 |     sha: v0.24.0
 9 |     hooks:
10 |     -   id: yapf
11 |         language_version: python3.7
12 |         args: ['--style={column_limit:120}' ]
13 |         files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
14 | -   repo: https://github.com/pre-commit/pre-commit-hooks
15 |     sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0
16 |     hooks:
17 |     -   id: check-added-large-files
18 |     -   id: check-merge-conflict
19 |     -   id: check-symlinks
20 |     -   id: detect-private-key
21 |         files: (?!.*third_party)^.*$ | (?!.*book)^.*$
22 |     -   id: end-of-file-fixer
23 | -   repo: local
24 |     hooks:
25 |     -   id: copyright_checker
26 |         name: copyright_checker
27 |         entry: python ./.copyright.hook
28 |         language: system
29 |         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
30 |         exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
31 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | formats: []
 3 | sphinx:
 4 |   configuration: docs/conf.py
 5 | python:
 6 |    version: 3.8
 7 |    install:
 8 |      - requirements: docs/requirements.txt
 9 |      - method: setuptools
10 |        path: .
11 | 


--------------------------------------------------------------------------------
/.teamcity/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ### [2019-07-09] 1.1-cuda9.0-cudnn7-docs:
 2 | add unnit test for compling the docs
 3 |   - add an envionment `docs` in anaconda 
 4 | 
 5 | ### [2020-07-16] cuda9.0-cudnn7-v3:
 6 | add python3.8 env
 7 | 
 8 | ### [2022-11-22] 2.1-cuda10.1-v0:
 9 | add python3.9 env, remove python2.7 env
10 | 
11 | ### [2022-11-23] 2.1-cuda10.1-v0.1:
12 | add paddlepaddle_gpu-2.3.1-cp39-cp39-manylinux1_x86_64.whl
13 | 
14 | ### [2022-11-25] 2.1-cuda10.1-v0.2:
15 | add test_example env
16 | 


--------------------------------------------------------------------------------
/.teamcity/Dockerfile:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | # A dev image based on paddle production image
17 | 
18 | FROM parl/parl-test:2.1-cuda10.1-v0.2
19 | 
20 | RUN apt-get update && apt-get install -y libgflags-dev libgoogle-glog-dev libomp-dev unzip
21 | RUN apt-get update && apt-get install -y libgtest-dev && cd /usr/src/gtest && mkdir build \
22 | 	&& cd build && cmake .. && make  && cp libgtest*.a /usr/local/lib
23 | 
24 | EXPOSE 22
25 | 


--------------------------------------------------------------------------------
/.teamcity/requirements.txt:
--------------------------------------------------------------------------------
1 | # requirements for unittest
2 | rarfile==3.1
3 | opencv-python<=4.3.0.34;python_version>="3"
4 | opencv-python==4.2.0.32;python_version<"3"
5 | gym
6 | details
7 | parameterized
8 | 


--------------------------------------------------------------------------------
/.teamcity/requirements_fluid.txt:
--------------------------------------------------------------------------------
1 | # requirements for paddle 1.8.5 unittest
2 | gym
3 | details
4 | parameterized
5 | paddlepaddle-gpu==1.8.5.post97
6 | 


--------------------------------------------------------------------------------
/.teamcity/requirements_torch.txt:
--------------------------------------------------------------------------------
1 | # requirements for torch unittest
2 | gym
3 | details
4 | parameterized
5 | 


--------------------------------------------------------------------------------
/.teamcity/windows_requirements_fluid.txt:
--------------------------------------------------------------------------------
1 | # requirements for windows unittest
2 | gym
3 | details
4 | parameterized
5 | paddlepaddle==1.8.5
6 | 


--------------------------------------------------------------------------------
/.teamcity/windows_requirements_paddle.txt:
--------------------------------------------------------------------------------
1 | # requirements for paddle 2.0 unittest
2 | gym
3 | details
4 | parameterized
5 | paddlepaddle>=2.0.0
6 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include parl/remote/static/logo.png
2 | include parl/remote/static/favicon.ico
3 | recursive-include parl/remote/templates *.html
4 | recursive-include parl/remote/static/css *.css
5 | recursive-include parl/remote/static/js *.js
6 | 


--------------------------------------------------------------------------------
/benchmark/fluid/A2C/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/A2C/learning_curve.png


--------------------------------------------------------------------------------
/benchmark/fluid/A2C/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/A2C/result.png


--------------------------------------------------------------------------------
/benchmark/fluid/DDPG/.benchmark/DDPG_HalfCheetah-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DDPG/.benchmark/DDPG_HalfCheetah-v2.png


--------------------------------------------------------------------------------
/benchmark/fluid/DDPG/.benchmark/DDPG_Hopper-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DDPG/.benchmark/DDPG_Hopper-v2.png


--------------------------------------------------------------------------------
/benchmark/fluid/DDPG/.benchmark/DDPG_Humanoid-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DDPG/.benchmark/DDPG_Humanoid-v2.png


--------------------------------------------------------------------------------
/benchmark/fluid/DDPG/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce DDPG with PARL
 2 | Based on PARL, the DDPG algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Atari benchmarks.
 3 | 
 4 | > Paper: DDPG in [Continuous control with deep reinforcement learning](https://arxiv.org/abs/1509.02971)
 5 | 
 6 | ### Mujoco games introduction
 7 | Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco games.
 8 | 
 9 | ### Benchmark result
10 | 
11 | <img src=".benchmark/DDPG_HalfCheetah-v2.png" width = "400" height ="300" alt="DDPG_HalfCheetah-v2"/> <img src=".benchmark/DDPG_Humanoid-v2.png" width = "400" height ="300" alt="DDPG_Humanoid-v2"/>  
12 | <img src=".benchmark/DDPG_Hopper-v2.png" width = "400" height ="300" alt="DDPG_Hopper-v2"/>
13 | 
14 | ## How to use
15 | ### Dependencies:
16 | + python3.5+
17 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle)
18 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL)
19 | + gym
20 | + tqdm
21 | + mujoco-py>=1.50.1.0
22 | 
23 | ### Start Training:
24 | ```
25 | # To train an agent for HalfCheetah-v2 game
26 | python train.py
27 | 
28 | # To train for other game
29 | # python train.py --env [ENV_NAME]
30 | 


--------------------------------------------------------------------------------
/benchmark/fluid/DQN/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce DQN with PARL
 2 | Based on PARL, we provide a simple demonstration of DQN.
 3 | 
 4 | + Paper: DQN in [Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html)
 5 | 
 6 | ### Result
 7 | 
 8 | Performance of DQN playing CartPole-v0
 9 | 
10 | <p align="left">
11 | <img src="../QuickStart/performance.gif" alt="result" height="175"/>
12 | <img src="cartpole.jpg" alt="result" height="175"/>
13 | </p>
14 | 
15 | ## How to use
16 | ### Dependencies:
17 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle)
18 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL)
19 | + gym
20 | + tqdm
21 | 
22 | 
23 | ### Start Training:
24 | ```
25 | # To train a model for CartPole-v0 game
26 | python train.py
27 | ```
28 | 
29 | ## DQN-Variants
30 | 
31 | For DQN variants such as Double DQN and Dueling DQN, please check [here](https://github.com/PaddlePaddle/PARL/tree/develop/benchmark/fluid/DQN_variant)
32 | 


--------------------------------------------------------------------------------
/benchmark/fluid/DQN/cartpole.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN/cartpole.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/DQN/cartpole_model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import paddle.fluid as fluid
16 | import parl
17 | from parl import layers
18 | 
19 | 
20 | class CartpoleModel(parl.Model):
21 |     def __init__(self, act_dim):
22 |         hid1_size = 128
23 |         hid2_size = 128
24 |         self.fc1 = layers.fc(size=hid1_size, act='relu')
25 |         self.fc2 = layers.fc(size=hid2_size, act='relu')
26 |         self.fc3 = layers.fc(size=act_dim, act=None)
27 | 
28 |     def value(self, obs):
29 |         h1 = self.fc1(obs)
30 |         h2 = self.fc2(h1)
31 |         Q = self.fc3(h2)
32 |         return Q
33 | 


--------------------------------------------------------------------------------
/benchmark/fluid/DQN/train_on_xpu.md:
--------------------------------------------------------------------------------
 1 | ## Running DQN on XPU
 2 | We provide a simple demonstration of running DQN on XPU.
 3 | 
 4 | + Paper: DQN in [Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html)
 5 | 
 6 | ### Result
 7 | 
 8 | Performance of DQN playing CartPole-v0
 9 | 
10 | <p align="left">
11 | <img src="../QuickStart/performance.gif" alt="result" height="175"/>
12 | <img src="cartpole.jpg" alt="result" height="175"/>
13 | </p>
14 | 
15 | ## How to use
16 | ### Dependencies:
17 | + [paddlepaddle>=2.0](https://github.com/PaddlePaddle/Paddle)
18 | + [parl](https://github.com/PaddlePaddle/PARL)
19 | + gym
20 | + tqdm
21 | 
22 | 
23 | ### Using XPU
24 | To use xpu, you should set environment variable FLAGS_selected_xpus == {your xpu card index}.
25 | ```
26 | export FLAGS_selected_xpus=0
27 | ```
28 | 
29 | ### Start Training:
30 | ```
31 | # To train a model for CartPole-v0 game
32 | python train_with_xpu.py
33 | ```
34 | 


--------------------------------------------------------------------------------
/benchmark/fluid/DQN_variant/.benchmark/merge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/.benchmark/merge.png


--------------------------------------------------------------------------------
/benchmark/fluid/DQN_variant/.benchmark/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/.benchmark/table.png


--------------------------------------------------------------------------------
/benchmark/fluid/DQN_variant/rom_files/battle_zone.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/rom_files/battle_zone.bin


--------------------------------------------------------------------------------
/benchmark/fluid/DQN_variant/rom_files/breakout.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/rom_files/breakout.bin


--------------------------------------------------------------------------------
/benchmark/fluid/DQN_variant/rom_files/pong.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/rom_files/pong.bin


--------------------------------------------------------------------------------
/benchmark/fluid/DQN_variant/utils.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import cv2
16 | from atari import AtariPlayer
17 | from atari_wrapper import FrameStack, MapState, FireResetEnv
18 | 
19 | 
20 | def get_player(rom,
21 |                image_size,
22 |                viz=False,
23 |                train=False,
24 |                frame_skip=1,
25 |                context_len=1):
26 |     env = AtariPlayer(
27 |         rom,
28 |         frame_skip=frame_skip,
29 |         viz=viz,
30 |         live_lost_as_eoe=train,
31 |         max_num_frames=60000)
32 |     env = FireResetEnv(env)
33 |     env = MapState(env, lambda im: cv2.resize(im, image_size))
34 |     if not train:
35 |         # in training, context is taken care of in expreplay buffer
36 |         env = FrameStack(env, context_len)
37 |     return env
38 | 


--------------------------------------------------------------------------------
/benchmark/fluid/ES/es.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import parl
16 | 
17 | __all__ = ['ES']
18 | 
19 | 
20 | class ES(parl.Algorithm):
21 |     def __init__(self, model):
22 |         """ES algorithm.
23 |         
24 |         Since parameters of the model is updated in the numpy level, `learn` function is not needed
25 |         in this algorithm.
26 | 
27 |         Args:
28 |             model(`parl.Model`): policy model of ES algorithm.
29 |         """
30 |         self.model = model
31 | 
32 |     def predict(self, obs):
33 |         """Use the policy model to predict actions of observations.
34 | 
35 |         Args:
36 |             obs(layers.data):  data layer of observations.
37 | 
38 |         Returns:
39 |             tensor of predicted actions.
40 |         """
41 |         return self.model(obs)
42 | 


--------------------------------------------------------------------------------
/benchmark/fluid/ES/es_config.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | config = {
16 |     #==========  remote config ==========
17 |     'master_address': 'localhost:8037',
18 | 
19 |     #==========  env config ==========
20 |     'env_name': 'Humanoid-v1',
21 | 
22 |     #==========  actor config ==========
23 |     'actor_num': 96,
24 |     'action_noise_std': 0.01,
25 |     'min_task_runtime': 0.2,
26 |     'eval_prob': 0.003,
27 |     'filter_update_prob': 0.01,
28 | 
29 |     #==========  learner config ==========
30 |     'stepsize': 0.01,
31 |     'min_episodes_per_batch': 1000,
32 |     'min_steps_per_batch': 10000,
33 |     'noise_size': 200000000,
34 |     'noise_stdev': 0.02,
35 |     'l2_coeff': 0.005,
36 |     'report_window_size': 10,
37 | }
38 | 


--------------------------------------------------------------------------------
/benchmark/fluid/ES/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/ES/learning_curve.png


--------------------------------------------------------------------------------
/benchmark/fluid/ES/mujoco_model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import paddle.fluid as fluid
16 | import parl
17 | from parl import layers
18 | 
19 | 
20 | class MujocoModel(parl.Model):
21 |     def __init__(self, act_dim):
22 |         hid1_size = 256
23 |         hid2_size = 256
24 | 
25 |         self.fc1 = layers.fc(size=hid1_size, act='tanh')
26 |         self.fc2 = layers.fc(size=hid2_size, act='tanh')
27 |         self.fc3 = layers.fc(size=act_dim)
28 | 
29 |     def forward(self, obs):
30 |         hid1 = self.fc1(obs)
31 |         hid2 = self.fc2(hid1)
32 |         means = self.fc3(hid2)
33 |         return means
34 | 


--------------------------------------------------------------------------------
/benchmark/fluid/ES/noise.py:
--------------------------------------------------------------------------------
 1 | # Third party code
 2 | #
 3 | # The following code are copied or modified from:
 4 | # https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | class SharedNoiseTable(object):
10 |     """Shared noise table used by learner and actor.
11 | 
12 |     Learner and actor will create a same noise table by passing the same seed.
13 |     With the same noise table, learner and actor can communicate the noises by
14 |     index of noise table instead of numpy array of noises.
15 |     """
16 | 
17 |     def __init__(self, noise_size, seed=1024):
18 |         self.noise_size = noise_size
19 |         self.seed = seed
20 |         self.noise = self._create_noise()
21 | 
22 |     def _create_noise(self):
23 |         noise = np.random.RandomState(self.seed).randn(self.noise_size).astype(
24 |             np.float32)
25 |         return noise
26 | 
27 |     def get(self, i, dim):
28 |         return self.noise[i:i + dim]
29 | 
30 |     def sample_index(self, dim):
31 |         return np.random.randint(0, len(self.noise) - dim + 1)
32 | 


--------------------------------------------------------------------------------
/benchmark/fluid/GA3C/.benchmark/GA3C_BeamRider.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_BeamRider.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/GA3C/.benchmark/GA3C_Breakout.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_Breakout.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/GA3C/.benchmark/GA3C_Pong.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_Pong.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/GA3C/.benchmark/GA3C_Qbert.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_Qbert.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/GA3C/.benchmark/GA3C_SpaceInvaders.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_SpaceInvaders.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/IMPALA/.benchmark/IMPALA_BeamRider.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_BeamRider.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/IMPALA/.benchmark/IMPALA_Breakout.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_Breakout.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/IMPALA/.benchmark/IMPALA_Pong.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_Pong.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/IMPALA/.benchmark/IMPALA_Qbert.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_Qbert.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/IMPALA/.benchmark/IMPALA_SpaceInvaders.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_SpaceInvaders.jpg


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple.gif


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple.png


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_adversary.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_adversary.gif


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_adversary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_adversary.png


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_crypto.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_crypto.png


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_push.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_push.gif


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_push.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_push.png


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_reference.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_reference.gif


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_reference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_reference.png


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_speaker_listener.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_speaker_listener.gif


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_speaker_listener.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_speaker_listener.png


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_spread.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_spread.gif


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_spread.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_spread.png


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_tag.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_tag.gif


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_tag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_tag.png


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_world_comm.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_world_comm.gif


--------------------------------------------------------------------------------
/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_world_comm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_world_comm.png


--------------------------------------------------------------------------------
/benchmark/fluid/PPO/.benchmark/PPO_HalfCheetah-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/PPO/.benchmark/PPO_HalfCheetah-v2.png


--------------------------------------------------------------------------------
/benchmark/fluid/PPO/.benchmark/PPO_Hopper-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/PPO/.benchmark/PPO_Hopper-v2.png


--------------------------------------------------------------------------------
/benchmark/fluid/PPO/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce PPO with PARL
 2 | Based on PARL, the PPO algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Atari benchmarks.
 3 | 
 4 | Include following approach:
 5 | + Clipped Surrogate Objective
 6 | + Adaptive KL Penalty Coefficient
 7 | 
 8 | > Paper: PPO in [Proximal Policy Optimization Algorithms](https://arxiv.org/abs/1707.06347)
 9 | 
10 | ### Mujoco games introduction
11 | Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco games.
12 | 
13 | ### Benchmark result
14 | 
15 | <img src=".benchmark/PPO_HalfCheetah-v2.png" width = "400" height ="300" alt="PPO_HalfCheetah-v2" />  <img src=".benchmark/PPO_Hopper-v2.png" width = "400" height ="300" alt="PPO_Hopper-v2" />  
16 | 
17 | ## How to use
18 | ### Dependencies:
19 | + python3.5+
20 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle)
21 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL)
22 | + gym
23 | + tqdm
24 | + mujoco-py>=1.50.1.0
25 | 
26 | ### Start Training:
27 | ```
28 | # To train an agent for HalfCheetah-v2 game (default: CLIP loss)
29 | python train.py
30 | 
31 | # To train for different game and different loss type
32 | # python train.py --env [ENV_NAME] --loss_type [CLIP|KLPEN]
33 | 


--------------------------------------------------------------------------------
/benchmark/fluid/Prioritized_DQN/README.md:
--------------------------------------------------------------------------------
 1 | ## Prioritized Experience Replay
 2 | Reproducing paper [Prioritized Experience Replay](https://arxiv.org/abs/1511.05952).
 3 | 
 4 | Prioritized experience replay (PER) develops a framework for prioritizing experience, so as to replay important transitions more frequently. There are two variants of prioritizing the transitions, rank-based and proportional-based. Our implementation is the proportional variant, which has a better performance, as reported in the original paper.
 5 | 
 6 | ## Reproduced Results
 7 | Results have been reproduced with [Double DQN](https://arxiv.org/abs/1509.06461v3) on following three environments:
 8 | 
 9 | <p align="center">
10 |   <img src="result.png"/>
11 | </p>
12 | 
13 | ## How to use
14 | 
15 | ### Dependencies:
16 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle)
17 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL)
18 | + gym[atari]==0.17.2
19 | + atari-py==0.2.6
20 | + tqdm
21 | + [ale_python_interface](https://github.com/mgbellemare/Arcade-Learning-Environment)
22 | 
23 | 
24 | ### Start Training:
25 | Train on BattleZone game:
26 | ```bash
27 | python train.py --rom ./rom_files/battle_zone.bin
28 | ```
29 | 
30 | > To train on more games, you can install more rom files from [here](https://github.com/openai/atari-py/tree/master/atari_py/atari_roms).
31 | 


--------------------------------------------------------------------------------
/benchmark/fluid/Prioritized_DQN/atari.py:
--------------------------------------------------------------------------------
1 | ../DQN_variant/atari.py


--------------------------------------------------------------------------------
/benchmark/fluid/Prioritized_DQN/atari_wrapper.py:
--------------------------------------------------------------------------------
1 | ../DQN_variant/atari_wrapper.py


--------------------------------------------------------------------------------
/benchmark/fluid/Prioritized_DQN/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/Prioritized_DQN/result.png


--------------------------------------------------------------------------------
/benchmark/fluid/Prioritized_DQN/rom_files:
--------------------------------------------------------------------------------
1 | ../DQN_variant/rom_files


--------------------------------------------------------------------------------
/benchmark/fluid/Prioritized_DQN/utils.py:
--------------------------------------------------------------------------------
1 | ../DQN_variant/utils.py


--------------------------------------------------------------------------------
/benchmark/fluid/QMIX/README.md:
--------------------------------------------------------------------------------
 1 | ## QMIX based on PARL and PaddlePaddle
 2 | We reproduce the QMIX based on **PARL** and **PaddlePaddle**, reaching the same level of indicators as the paper in StarCraft2 benchmarks.
 3 | ### QMIX
 4 | QMIX is a **value-based** multi-agent reinforcement learning algorithm.  
 5 | Learn more about QMIX from: [QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1803.11485)
 6 | ### StarCraft2 Environment
 7 | Paper: [The StarCraft Multi-Agent Challenge](https://arxiv.org/pdf/1902.04043)  
 8 | Github Repositories: [smac](https://github.com/oxwhirl/smac)  
 9 | ## Benchmark Results
10 | <img src="images/paddle-qmix-result.png" width = "1200"  alt="Performance" />  
11 | 
12 | - We trained our model in 5 different scenarios: *"3m", "8m", "2s_3z", "3s_5z"* and *"1c_3s_5z"*.
13 | - The **difficulty** in all scenarios are set to be "7" (very difficult).  
14 | - We trained our model 3 times for each scenario.
15 | 
16 | ## How to Use
17 | ### Dependencies
18 | + python3.5+
19 | - [PaddlePaddle==1.8.5](https://github.com/PaddlePaddle/Paddle)
20 | - [parl<2.0.0](https://github.com/PaddlePaddle/PARL)
21 | - [smac](https://github.com/oxwhirl/smac)
22 | 
23 | ### Start Training
24 | 1. Modify the config in `qmix_config.py`.
25 | 2. Start training:
26 |     ```bash
27 |     python train.py
28 |     ```
29 | 3. View the training process with tensorboard:
30 |     ```bash
31 |     tensorboard --logdir ./
32 |     ```
33 | 


--------------------------------------------------------------------------------
/benchmark/fluid/QMIX/images/paddle-qmix-result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/QMIX/images/paddle-qmix-result.png


--------------------------------------------------------------------------------
/benchmark/fluid/QuickStart/README.md:
--------------------------------------------------------------------------------
 1 | ## Quick Start
 2 | Train an agent with PARL to solve the CartPole problem, a classical benchmark in RL.
 3 | 
 4 | ## How to use
 5 | ### Dependencies:
 6 | 
 7 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle)
 8 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL)
 9 | + gym
10 | 
11 | ### Start Training:
12 | ```
13 | # Install dependencies
14 | pip install paddlepaddle  
15 | # Or use Cuda: pip install paddlepaddle-gpu
16 | 
17 | pip install gym
18 | git clone https://github.com/PaddlePaddle/PARL.git
19 | cd PARL
20 | pip install .
21 | 
22 | # Train model
23 | cd examples/QuickStart/
24 | python train.py  
25 | ```
26 | 
27 | ### Expected Result
28 | <img src="performance.gif" width = "300" height ="200" alt="result"/>
29 | 
30 | The agent can get around 200 points in a few minutes.
31 | 


--------------------------------------------------------------------------------
/benchmark/fluid/QuickStart/cartpole_model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import parl
16 | from parl import layers
17 | 
18 | 
19 | class CartpoleModel(parl.Model):
20 |     def __init__(self, act_dim):
21 |         act_dim = act_dim
22 |         hid1_size = act_dim * 10
23 | 
24 |         self.fc1 = layers.fc(size=hid1_size, act='tanh')
25 |         self.fc2 = layers.fc(size=act_dim, act='softmax')
26 | 
27 |     def forward(self, obs):
28 |         out = self.fc1(obs)
29 |         out = self.fc2(out)
30 |         return out
31 | 


--------------------------------------------------------------------------------
/benchmark/fluid/QuickStart/performance.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/QuickStart/performance.gif


--------------------------------------------------------------------------------
/benchmark/fluid/SAC/.benchmark/merge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/SAC/.benchmark/merge.png


--------------------------------------------------------------------------------
/benchmark/fluid/SAC/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce SAC with PARL
 2 | Based on PARL, the SAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | Include following approaches:
 5 | + DDPG Style with Stochastic Policy
 6 | + Maximum Entropy
 7 | 
 8 | > Paper: SAC in [Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor](https://arxiv.org/abs/1801.01290)
 9 | 
10 | ### Mujoco games introduction
11 | Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco games.
12 | 
13 | ### Benchmark result
14 | 
15 | <img src=".benchmark/merge.png" width = "1500" height ="260" alt="Performance" />
16 | 
17 | ## How to use
18 | ### Dependencies:
19 | + python3.5+
20 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle)
21 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL)
22 | + gym
23 | + mujoco-py>=1.50.1.0
24 | 
25 | ### Start Training:
26 | ```
27 | # To train an agent for HalfCheetah-v2 game
28 | python train.py
29 | 
30 | # To train for different games
31 | # python train.py --env [ENV_NAME]
32 | 


--------------------------------------------------------------------------------
/benchmark/fluid/TD3/.benchmark/merge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/TD3/.benchmark/merge.png


--------------------------------------------------------------------------------
/benchmark/fluid/TD3/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce TD3 with PARL
 2 | Based on PARL, the TD3 algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | Include following approaches:
 5 | + Clipped Double Q-learning
 6 | + Target Networks and Delayed Policy Update
 7 | + Target Policy Smoothing Regularization
 8 | 
 9 | > Paper: TD3 in [Addressing Function Approximation Error in Actor-Critic Methods](https://arxiv.org/abs/1802.09477)
10 | 
11 | ### Mujoco games introduction
12 | Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco games.
13 | 
14 | ### Benchmark result
15 | 
16 | <img src=".benchmark/merge.png" width = "1500" height ="260" alt="Performance" />
17 | 
18 | ## How to use
19 | ### Dependencies:
20 | + python3.5+
21 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle)
22 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL)
23 | + gym
24 | + mujoco-py>=1.50.1.0
25 | 
26 | ### Start Training:
27 | ```
28 | # To train an agent for HalfCheetah-v2 game
29 | python train.py
30 | 
31 | # To train for different game and different loss type
32 | # python train.py --env [ENV_NAME]
33 | 


--------------------------------------------------------------------------------
/benchmark/fluid/offline-Q-learning/atari.py:
--------------------------------------------------------------------------------
1 | ../DQN_variant/atari.py


--------------------------------------------------------------------------------
/benchmark/fluid/offline-Q-learning/atari_wrapper.py:
--------------------------------------------------------------------------------
1 | ../DQN_variant/atari_wrapper.py


--------------------------------------------------------------------------------
/benchmark/fluid/offline-Q-learning/rom_files:
--------------------------------------------------------------------------------
1 | ../DQN_variant/rom_files


--------------------------------------------------------------------------------
/benchmark/fluid/offline-Q-learning/utils.py:
--------------------------------------------------------------------------------
1 | ../DQN_variant/utils.py


--------------------------------------------------------------------------------
/benchmark/torch/AlphaZero/.pic/good_moves.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/AlphaZero/.pic/good_moves.png


--------------------------------------------------------------------------------
/benchmark/torch/AlphaZero/.pic/perfect_moves.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/AlphaZero/.pic/perfect_moves.png


--------------------------------------------------------------------------------
/benchmark/torch/AlphaZero/gen_submission.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import base64
17 | import inspect
18 | import os
19 | 
20 | assert len(sys.argv) == 2, "please specify model path."
21 | model_path = sys.argv[1]
22 | 
23 | with open(model_path, 'rb') as f:
24 |     raw_bytes = f.read()
25 |     encoded_weights = base64.encodebytes(raw_bytes)
26 | 
27 | # encode weights of model to byte string
28 | submission_file = """
29 | import base64
30 | decoded = base64.b64decode({})
31 | 
32 | """.format(encoded_weights)
33 | 
34 | # insert code snippet of loading weights
35 | with open('submission_template.py', 'r') as f:
36 |     submission_file += ''.join(f.readlines())
37 | 
38 | # generate final submission file
39 | with open('submission.py', 'w') as f:
40 |     f.write(submission_file)
41 | 


--------------------------------------------------------------------------------
/benchmark/torch/DT/README.md:
--------------------------------------------------------------------------------
 1 | ## Introduction
 2 | Based on PARL, we provide the implementation of decision transformer, with the same performance as reported in the original paper.
 3 | 
 4 | > Paper: [Decision Transformer: Reinforcement
 5 | Learning via Sequence Modeling](https://arxiv.org/abs/2106.01345)
 6 | 
 7 | ### Dataset for RL
 8 | Follow the installation instruction in [D4RL](https://github.com/Farama-Foundation/D4RL) to install D4RL.
 9 | Then run the scripts in `data` directory to download dataset for traininig.
10 | ```shell
11 | python download_d4rl_datasets.py
12 | ```
13 | 
14 | 
15 | ### Benchmark result
16 | #### 1. Mujoco results
17 | <p align="center">
18 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/DT/torch/mujoco_result.png" alt="mujoco-result"/>
19 | </p>
20 | 
21 | + Each experiment was run three times with different random seeds
22 | 
23 | ## How to use
24 | ### Dependencies:
25 | + [D4RL](//github.com/Farama-Foundation/D4RL)
26 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
27 | + pytorch
28 | + gym==0.18.3
29 | + mujoco-py==2.0.2.13
30 | + transformers==4.5.1
31 | 
32 | 
33 | ### Training:
34 | 
35 | ```shell
36 | # To train an agent for `hooper` environment with `medium` dataset
37 | python train.py --env hopper --dataset medium
38 | 
39 | # To train an agent for `hooper` environment with `expert` dataset
40 | python train.py --env hopper --dataset expert
41 | ```
42 | 
43 | 
44 | ### Reference
45 | 
46 | https://github.com/kzl/decision-transformer
47 | 


--------------------------------------------------------------------------------
/benchmark/torch/ES/es.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import parl
16 | 
17 | __all__ = ['ES']
18 | 
19 | 
20 | class ES(parl.Algorithm):
21 |     def __init__(self, model):
22 |         """ES algorithm.
23 | 
24 |         Since parameters of the model is updated in the numpy level, `learn` function is not needed
25 |         in this algorithm.
26 | 
27 |         Args:
28 |             model(`parl.Model`): policy model of ES algorithm.
29 |         """
30 |         self.model = model
31 | 
32 |     def predict(self, obs):
33 |         """Use the policy model to predict actions of observations.
34 | 
35 |         Args:
36 |             obs(layers.data):  data layer of observations.
37 | 
38 |         Returns:
39 |             tensor of predicted actions.
40 |         """
41 | 
42 |         return self.model(obs)
43 | 


--------------------------------------------------------------------------------
/benchmark/torch/ES/es_config.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | config = {
16 |     #==========  remote config ==========
17 |     'master_address': 'localhost:8010',
18 |     #==========  env config ==========
19 |     'env_name': 'Humanoid-v4',
20 |     #==========  actor config ==========
21 |     'actor_num': 24,
22 |     'action_noise_std': 0.01,
23 |     'min_task_runtime': 0.2,
24 |     'eval_prob': 0.003,
25 |     'filter_update_prob': 0.01,
26 | 
27 |     #==========  learner config ==========
28 |     'stepsize': 0.01,
29 |     'train_steps': 200,
30 |     'min_episodes_per_batch': 1000,
31 |     'min_steps_per_batch': 10000,
32 |     'noise_size': 200000000,
33 |     'noise_stdev': 0.02,
34 |     'l2_coeff': 0.005,
35 |     'report_window_size': 10,
36 | }
37 | 


--------------------------------------------------------------------------------
/benchmark/torch/ES/mujoco_model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | 
20 | import parl
21 | 
22 | 
23 | class MujocoModel(parl.Model):
24 |     def __init__(self, obs_dim, act_dim):
25 |         super(MujocoModel, self).__init__()
26 | 
27 |         hid1_size = 256
28 |         hid2_size = 256
29 |         self.fc1 = nn.Linear(obs_dim, hid1_size)
30 |         self.fc2 = nn.Linear(hid1_size, hid2_size)
31 |         self.fc3 = nn.Linear(hid2_size, act_dim)
32 | 
33 |     def forward(self, obs):
34 |         hid1 = F.tanh(self.fc1(obs))
35 |         hid2 = F.tanh(self.fc2(hid1))
36 |         means = self.fc3(hid2)
37 |         return means
38 | 


--------------------------------------------------------------------------------
/benchmark/torch/ES/noise.py:
--------------------------------------------------------------------------------
 1 | # Third party code
 2 | #
 3 | # The following code are copied or modified from:
 4 | # https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | class SharedNoiseTable(object):
10 |     """Shared noise table used by learner and actor.
11 | 
12 |     Learner and actor will create a same noise table by passing the same seed.
13 |     With the same noise table, learner and actor can communicate the noises by
14 |     index of noise table instead of numpy array of noises.
15 |     """
16 | 
17 |     def __init__(self, noise_size, seed=1024):
18 |         self.noise_size = noise_size
19 |         self.seed = seed
20 |         self.noise = self._create_noise()
21 | 
22 |     def _create_noise(self):
23 |         noise = np.random.RandomState(self.seed).randn(self.noise_size).astype(
24 |             np.float32)
25 |         return noise
26 | 
27 |     def get(self, i, dim):
28 |         return self.noise[i:i + dim]
29 | 
30 |     def sample_index(self, dim):
31 |         return np.random.randint(0, len(self.noise) - dim + 1)
32 | 


--------------------------------------------------------------------------------
/benchmark/torch/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/README.md:
--------------------------------------------------------------------------------
 1 | ## NeurIPS2020 L2RPN Challenge
 2 | 
 3 | The **PARL** team gets the first place for all tracks (both Robustness Track and Adaptability Track) in *NeurIPS2020 Learning-to-Run-a-Power-Network* challenge! 
 4 | 
 5 | <p align="center">
 6 | <img src="../../../examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/images/l2rpn.jpeg" alt="PARL" height="300" />
 7 | </p>
 8 | 
 9 | ## Paper Citation
10 | 
11 | If you use our code for your experiments or found it helpful, consider citing the following paper:
12 | 
13 | <pre>
14 | @inproceedings{Zhou2021ActionSB,
15 |   title={Action Set Based Policy Optimization for Safe Power Grid Management},
16 |   author={Bo Zhou and Hongsheng Zeng and Yuecheng Liu and Kejiao Li and Fan Wang and Hao Tian},
17 |   journal={ECML PKDD2021},
18 |   year={2021}
19 | }
20 | </pre>
21 | 


--------------------------------------------------------------------------------
/benchmark/torch/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track1/README.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies
 2 | - python3.6
 3 | - [parl==1.3.2](https://github.com/PaddlePaddle/PARL)
 4 | - Pytorch==1.6.0
 5 | - [grid2op==1.2.2](https://github.com/rte-france/Grid2Op)
 6 | - [lightsim2grid==0.2.4](https://github.com/BDonnot/lightsim2grid)
 7 | 
 8 | ## How to evaluate
 9 |   1. Clone the repository.
10 |   2. Download the saved models from online storage service: [Baidu Pan](https://pan.baidu.com/s/14M1ccn72rgE_7X19e94bkQ) (password: `asiv`) or [Google Drive](https://drive.google.com/file/d/1mDE7K__QFHHxWCWIq53egtjPAVC0Jt0k/view?usp=sharing)
11 |   3. Unpack the file:  
12 |     ```
13 |     tar -xvzf saved_files.tar.gz
14 |     ```
15 |   4. evaluate the result:  
16 |     ```
17 |     python evaluate.py --nb_episode=10
18 |     ```
19 | 


--------------------------------------------------------------------------------
/benchmark/torch/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track2/README.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies
 2 | - python3.6
 3 | - [parl==1.3.2](https://github.com/PaddlePaddle/PARL)
 4 | - Pytorch==1.6.0
 5 | - [grid2op==1.2.2](https://github.com/rte-france/Grid2Op)
 6 | - [lightsim2grid==0.2.4](https://github.com/BDonnot/lightsim2grid)
 7 | 
 8 | ## How to evaluate
 9 |   1. Clone the repository.
10 |   2. Download the saved models from online storage service: [Baidu Pan](https://pan.baidu.com/s/1qpylN5QJA-h6EcaoUC1sgg) (password: `0r7v`) or [Google Drive](https://drive.google.com/file/d/1FuPz5bEeMSTM9QMR3cpbzH69TLMhklr4/view?usp=sharing)
11 |   3. Unpack the file:  
12 | 	```
13 | 	tar -zxvf saved_files.tar.gz
14 | 	```
15 |   4. evaluate the result:  
16 | 	```
17 | 	python evaluate.py --nb_episode=10
18 | 	```
19 | 


--------------------------------------------------------------------------------
/benchmark/torch/QuickStart/README.md:
--------------------------------------------------------------------------------
 1 | ## PyTorch benchmark Quick Start
 2 | Train an agent with PARL to solve the CartPole problem, a classical benchmark in RL.
 3 | 
 4 | ## How to use
 5 | ### Dependencies:
 6 | 
 7 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
 8 | + torch
 9 | + gym
10 | 
11 | ### Start Training:
12 | ```
13 | # Install dependencies
14 | pip install torch torchvision gym
15 | 
16 | git clone https://github.com/PaddlePaddle/PARL.git
17 | cd PARL
18 | pip install .
19 | 
20 | # Train model
21 | cd benchmark/torch/QuickStart
22 | python train.py  
23 | ```
24 | 
25 | ### Expected Result
26 | <img src="https://github.com/PaddlePaddle/PARL/blob/develop/examples/QuickStart/performance.gif" width = "300" height ="200" alt="result"/>
27 | 
28 | The agent can get around 200 points in a few minutes.
29 | 


--------------------------------------------------------------------------------
/benchmark/torch/QuickStart/cartpole_model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.nn as nn
17 | import torch.nn.functional as F
18 | import parl
19 | 
20 | 
21 | class CartpoleModel(parl.Model):
22 |     """ Linear network to solve Cartpole problem.
23 |     
24 |     Args:
25 |         obs_dim (int): Dimension of observation space.
26 |         act_dim (int): Dimension of action space.
27 |     """
28 | 
29 |     def __init__(self, obs_dim, act_dim):
30 |         super(CartpoleModel, self).__init__()
31 |         hid1_size = act_dim * 10
32 |         self.fc1 = nn.Linear(obs_dim, hid1_size)
33 |         self.fc2 = nn.Linear(hid1_size, act_dim)
34 | 
35 |     def forward(self, x):
36 |         out = torch.tanh(self.fc1(x))
37 |         prob = F.softmax(self.fc2(out), dim=-1)
38 |         return prob
39 | 


--------------------------------------------------------------------------------
/benchmark/torch/a2c/.result/result_a2c_torch0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/a2c/.result/result_a2c_torch0.png


--------------------------------------------------------------------------------
/benchmark/torch/a2c/.result/result_a2c_torch1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/a2c/.result/result_a2c_torch1.png


--------------------------------------------------------------------------------
/benchmark/torch/a2c/a2c_config.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | config = {
16 | 
17 |     #==========  remote config ==========
18 |     'master_address': 'localhost:8010',
19 |     #==========  env config ==========
20 |     'env_name': 'PongNoFrameskip-v4',
21 |     'env_dim': 84,
22 | 
23 |     #==========  actor config ==========
24 |     'actor_num': 5,
25 |     'env_num': 5,
26 |     'sample_batch_steps': 20,
27 | 
28 |     #==========  learner config ==========
29 |     'max_sample_steps': int(1e7),
30 |     'gamma': 0.99,
31 |     'lambda': 1.0,
32 | 
33 |     # start learning rate
34 |     'start_lr': 0.001,
35 | 
36 |     # coefficient of policy entropy adjustment schedule: (train_step, coefficient)
37 |     'entropy_coeff_scheduler': [(0, -0.01)],
38 |     'vf_loss_coeff': 0.5,
39 |     'log_metrics_interval_s': 10,
40 |     'learning_rate': 0.001,
41 | }
42 | 


--------------------------------------------------------------------------------
/benchmark/torch/coma/.benchmark/3m_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/coma/.benchmark/3m_result.png


--------------------------------------------------------------------------------
/benchmark/torch/coma/starcraft2/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:9.2-cudnn7-devel-ubuntu16.04
 2 | MAINTAINER Tabish Rashid
 3 | 
 4 | # CUDA includes
 5 | ENV CUDA_PATH /usr/local/cuda
 6 | ENV CUDA_INCLUDE_PATH /usr/local/cuda/include
 7 | ENV CUDA_LIBRARY_PATH /usr/local/cuda/lib64
 8 | 
 9 | # Ubuntu Packages
10 | RUN apt-get update -y && apt-get install software-properties-common -y && \
11 |     add-apt-repository -y multiverse && apt-get update -y && apt-get upgrade -y && \
12 |     apt-get install -y apt-utils nano vim git man build-essential wget sudo && \
13 |     rm -rf /var/lib/apt/lists/*
14 | 
15 | # Install python3 pip3
16 | RUN apt-get update
17 | RUN apt-get -y install python3
18 | RUN apt-get -y install python3-pip
19 | RUN pip3 install --upgrade pip
20 | 
21 | #### -------------------------------------------------------------------
22 | #### install parl
23 | #### -------------------------------------------------------------------
24 | RUN pip3 install parl
25 | 
26 | #### -------------------------------------------------------------------
27 | #### install SMAC
28 | #### -------------------------------------------------------------------
29 | RUN pip3 install git+https://github.com/oxwhirl/smac.git
30 | 
31 | #### -------------------------------------------------------------------
32 | #### install pytorch
33 | #### -------------------------------------------------------------------
34 | RUN pip3 install torch
35 | 
36 | 
37 | ENV SC2PATH /parl/starcraft2/StarCraftII
38 | WORKDIR /parl
39 | 


--------------------------------------------------------------------------------
/benchmark/torch/coma/starcraft2/build_docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | #### -------------------------------------------------------------------
4 | #### build docker image
5 | #### -------------------------------------------------------------------
6 | echo 'Building Dockerfile with image name parl-starcraft2:1.0'
7 | docker build -t parl-starcraft2:1.0 .
8 | 


--------------------------------------------------------------------------------
/benchmark/torch/ddpg/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce DDPG with PARL
 2 | Based on PARL, the DDPG algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | > Paper: DDPG in [Continuous control with deep reinforcement learning](https://arxiv.org/abs/1509.02971)
 5 | 
 6 | ### Mujoco games introduction
 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco).
 8 | 
 9 | ### Benchmark result
10 | 
11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/DDPG/torch/result.png" width="600" alt="DDPG_results"/>
12 | 
13 | ## How to use
14 | ### Dependencies:
15 | + python3.7+
16 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
17 | + gym>=0.26.0
18 | + torch
19 | + mujoco>=2.2.2
20 | 
21 | ### Start Training:
22 | ```
23 | # To train an agent for HalfCheetah-v4 game
24 | python train.py
25 | 
26 | # To train for other game
27 | # python train.py --env [ENV_NAME]
28 | 


--------------------------------------------------------------------------------
/benchmark/torch/dqn/.benchmark/dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/dqn/.benchmark/dqn.png


--------------------------------------------------------------------------------
/benchmark/torch/maml++/.benchmark/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/maml++/.benchmark/loss.png


--------------------------------------------------------------------------------
/benchmark/torch/maml++/README.md:
--------------------------------------------------------------------------------
 1 | # Regression MAML/MAML++ with PARL
 2 | 
 3 | Implementation of [MAML](https://arxiv.org/abs/1703.03400) and [MAML++](https://arxiv.org/abs/1810.09502) with PyTorch and PARL that works for regression tasks.
 4 | 
 5 | ## Benchmark result
 6 | 
 7 | We follow the regression task setting from [Meta-SGD](https://arxiv.org/pdf/1707.09835.pdf), where the model is going to learn different sine waves. We train and test the model with 5-shot tasks. The figure below shows the test losses of MAML and MAML++ on 10000 randomly generated sine waves.
 8 | 
 9 | <p align="center">
10 | <img src=".benchmark/loss.png" alt="result"/>
11 | </p>
12 | 
13 | | MAML(from Meta-SGD) | Mate-SGD(from Meta-SGD) | MAML (ours) | MAML++ (ours)|
14 | | --- | --- | --- | --- |
15 | | 1.13&plusmn;0.18 |0.90&plusmn;0.16|  0.93&plusmn;0.02 | 0.34&plusmn;0.01 |
16 | 
17 | ## How to use
18 | 
19 | ### Dependencies:
20 | 
21 | + python>=3.7.0
22 | + pytorch==1.7.1
23 | + parl
24 | 
25 | ### Start Training:
26 | 
27 | ~~~
28 | python3 train.py
29 | ~~~
30 | 


--------------------------------------------------------------------------------
/benchmark/torch/oac/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce OAC with PARL
 2 | Based on PARL, the OAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | > Paper: OAC in [Better Exploration with Optimistic Actor-Critic](https://arxiv.org/abs/1910.12807)
 5 | 
 6 | ### Mujoco games introduction
 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco).
 8 | 
 9 | ### Benchmark result
10 | 
11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/OAC/torch/result.png" width="600" alt="OAC_results"/>
12 | 
13 | + Each experiment was run three times with different seeds
14 | 
15 | ## How to use
16 | ### Dependencies:
17 | + python3.7+
18 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
19 | + gym>=0.26.0
20 | + torch
21 | + mujoco>=2.2.2
22 | 
23 | ### Start Training:
24 | ```train
25 | # To train an agent for HalfCheetah-v4 game
26 | python train.py
27 | 
28 | # To train for other game & params
29 | python train.py --env [ENV_NAME] --alpha [float] --beta [float] --delta [float]
30 | ```
31 | 
32 | ### Reference
33 | + [microsoft/oac-explore](https://github.com/microsoft/oac-explore)
34 | 


--------------------------------------------------------------------------------
/benchmark/torch/qmix/README.md:
--------------------------------------------------------------------------------
 1 | ## QMIX based on Pytorch
 2 | We reproduce the QMIX based on **PARL** and **Pytorch**, reaching the same level of indicators as the paper in StarCraftII benchmarks.
 3 | ### QMIX
 4 | QMIX is a **value-based** multi-agent reinforcement learning algorithm.  
 5 | See more information about QMIX in: [QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1803.11485)
 6 | ### StarCraftII Environment
 7 | Paper: [The StarCraft Multi-Agent Challenge](https://arxiv.org/pdf/1902.04043)  
 8 | Github Repositories: [smac](https://github.com/oxwhirl/smac)  
 9 | ## Benchmark Results
10 | <img src="images/torch-qmix-result.png" width = "1200"  alt="Performance" />  
11 | 
12 | - We traineded our model in 5 different scenarios: *"3m", "8m", "2s_3z", "3s_5z"* and *"1c_3s_5z"*.  
13 | - The **difficulty** in all scenarios are set to be "7" (very difficult).  
14 | - We trainined the model 3 times for each scenario.
15 | 
16 | ## How to Use
17 | ### Dependencies
18 | - python>=3.6
19 | - [parl](https://github.com/PaddlePaddle/PARL)
20 | - [smac](https://github.com/oxwhirl/smac)
21 | - Pytorch>=1.6.0
22 | 
23 | ### Start Training
24 | 1. Modify the config in `qmix_config.py`.
25 | 2. Start training:
26 |     ```bash
27 |     python train.py
28 |     ```
29 | 3. View the training process with tensorboard:
30 |     ```bash
31 |     tensorboard --logdir ./
32 |     ```
33 | 


--------------------------------------------------------------------------------
/benchmark/torch/qmix/images/torch-qmix-result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/qmix/images/torch-qmix-result.png


--------------------------------------------------------------------------------
/benchmark/torch/qmix/utils.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | 
17 | 
18 | class OneHotTransform(object):
19 |     def __init__(self, out_dim):
20 |         self.out_dim = out_dim
21 | 
22 |     def __call__(self, agent_id):
23 |         assert agent_id < self.out_dim
24 |         one_hot_id = np.zeros(self.out_dim, dtype='float32')
25 |         one_hot_id[agent_id] = 1.0
26 |         return one_hot_id
27 | 


--------------------------------------------------------------------------------
/benchmark/torch/sac/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce SAC with PARL
 2 | Based on PARL, the SAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | > Paper: SAC in [Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor](https://arxiv.org/abs/1801.01290)
 5 | 
 6 | ### Mujoco games introduction
 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco).
 8 | 
 9 | ### Benchmark result
10 | 
11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/SAC/torch/result.png" alt="SAC_results"/>
12 | 
13 | + Each experiment was run three times with different seeds
14 | 
15 | ## How to use
16 | ### Dependencies:
17 | + python3.7+
18 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
19 | + gym>=0.26.0
20 | + torch
21 | + mujoco>=2.2.2
22 | 
23 | ### Start Training:
24 | #### Train
25 | ```
26 | # To train for HalfCheetah-v2(default),Hopper-v4,Walker2d-v4,Ant-v4
27 | # --alpha 0.2(default)
28 | python train.py --env [ENV_NAME]
29 | 
30 | # To reproduce the performance of Humanoid-v4
31 | python train.py --env Humanoid-v4 --alpha 0.05
32 | 


--------------------------------------------------------------------------------
/benchmark/torch/td3/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce TD3 with PARL
 2 | Based on PARL, the TD3 algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | Include following approaches:
 5 | + Clipped Double Q-learning
 6 | + Target Networks and Delayed Policy Update
 7 | + Target Policy Smoothing Regularization
 8 | 
 9 | > TD3 in
10 | [Addressing Function Approximation Error in Actor-Critic Methods](https://arxiv.org/abs/1802.09477)
11 | 
12 | ### Mujoco games introduction
13 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco).
14 | 
15 | ### Benchmark result
16 | 
17 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/TD3/torch/result.png" alt="Performance" />
18 | 
19 | ## How to use
20 | ### Dependencies:
21 | + python3.7+
22 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
23 | + gym>=0.26.0
24 | + torch
25 | + mujoco>=2.2.2
26 | 
27 | ### Start Training:
28 | ```
29 | # To train an agent for HalfCheetah-v4 game
30 | python train.py
31 | 
32 | # To train for different game and different loss type
33 | # python train.py --env [ENV_NAME]
34 | 


--------------------------------------------------------------------------------
/docs/EvoKit/overview.rst:
--------------------------------------------------------------------------------
 1 | Overview
 2 | ------------------
 3 | 
 4 | ``EvoKit`` 是一个集合了多种进化算法、兼容多种类预测框架的进化算法库,主打 **快速上线验证** 。
 5 | 
 6 | .. image:: ../../evo_kit/DeepES.gif
 7 |   :align: center
 8 |   :width: 400px
 9 | 
10 | 特性
11 | #########
12 | 
13 | **1. 多种进化算法支持。** 支持高斯采样、CMA、GA等算法,更多算法持续接入中。
14 | 
15 | **2. 主流优化器支持。** 支持SGD/Momentum/Adam等多个主流优化器,有效提升算法收敛效率。
16 | 
17 | **3. 一站式上线。** 整合了线上采样和线下更新流程, 提供Bcloud/Cmake等编译方式, 助力快速上线。
18 | 
19 | **4. 深度学习框架全系列兼容。** 裸写的网络,paddle/lego/Torch等深度学习框架,EvoKit都支持。
20 | 
21 | **5. 同步/异步更新方式。** 支持多个采样模型/多份采样数据异步更新,完美契合业务场景。
22 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
20 | 


--------------------------------------------------------------------------------
/docs/api_docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. PARL_docs documentation master file, created by
 2 |    sphinx-quickstart on Mon Apr 22 11:12:25 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | PARL Documentation
 7 | =====================================
 8 | 
 9 | .. toctree::
10 |   :maxdepth: 1
11 | 
12 |   utils
13 | 


--------------------------------------------------------------------------------
/docs/api_docs/utils.rst:
--------------------------------------------------------------------------------
1 | parl.Model
2 | --------------------
3 | .. automodule:: parl.framework.model_base
4 |     :members:
5 |     :undoc-members:
6 |     :show-inheritance:
7 | 


--------------------------------------------------------------------------------
/docs/apis/agent.rst:
--------------------------------------------------------------------------------
1 | parl.Agent
2 | ------
3 | .. autoclass:: parl.core.paddle.agent.Agent
4 |     :members:
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/apis/algorithm.rst:
--------------------------------------------------------------------------------
1 | parl.Algorithm
2 | ------
3 | .. autoclass:: parl.core.paddle.algorithm.Algorithm
4 |     :members:
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/apis/connect.rst:
--------------------------------------------------------------------------------
1 | parl.connect
2 | ------
3 | .. autoclass:: parl.remote.client.connect
4 |     :members:
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/apis/model.rst:
--------------------------------------------------------------------------------
1 | parl.Model
2 | ------
3 | .. autoclass:: parl.core.paddle.model.Model
4 |     :members:
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/apis/remote_class.rst:
--------------------------------------------------------------------------------
1 | parl.remote_class
2 | ------
3 | .. autoclass:: parl.remote.remote_decorator.remote_class
4 |     :members:
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/basic_structure/agent.rst:
--------------------------------------------------------------------------------
 1 | Agent (*Generate Data Flow*)
 2 | ===============================
 3 | 
 4 | Methods
 5 | --------
 6 | 1. __init__(self, algorithm, gpu_id=None)
 7 | 
 8 |     Call build_program here and run initialization for default_startup_program.
 9 | 
10 | 2. build_program(self)
11 | 
12 |     Use define_predict and define_learn in Algorithm to build training program and prediction program. This will be called
13 |     by __init__ method in class Agent.
14 | 
15 | 3. predict(self, obs)
16 | 
17 |     Predict the action with current observation of the environment. Note that this function will only do the prediction and it doesn't try any exploration.
18 |     To explore in the action space, you should create your process in `sample` function below.
19 |     Basically, this function is often used in test process.
20 | 
21 | 4. sample(self, obs)
22 | 
23 |     Predict the action given current observation of the environment. 
24 |     Additionaly, action will be added noise here to explore a new trajectory. 
25 |     Basically, this function is often used in training process.
26 | 
27 | 5. learn(self, obs, action, reward, next_obs, terminal)
28 | 
29 |     Pass data to the training program to update model. This method is the training interface for Agent.
30 | 


--------------------------------------------------------------------------------
/docs/basic_structure/algorithm.rst:
--------------------------------------------------------------------------------
 1 | Algorithm (*Backward Part*)
 2 | =============================
 3 | 
 4 | Methods
 5 | ---------
 6 | 1. define_predict(self, obs)
 7 | 
 8 |     Use method policy( ) from Agent to predict the probabilities of actions.
 9 | 
10 | 2. define_learn(self, obs, action, reward, next_obs, terminal)
11 | 
12 |     Define loss function and optimizer here to update the policy model.
13 | 
14 | An Example
15 | -----------
16 | 
17 | 
18 | 
19 | .. code-block:: python
20 |     :linenos:
21 | 
22 |     # From https://github.com/PaddlePaddle/PARL/blob/develop/parl/algorithms/policy_gradient.py
23 | 
24 |     class PolicyGradient(Algorithm):
25 |     def __init__(self, model, hyperparas):
26 |         Algorithm.__init__(self, model, hyperparas)
27 |         self.model = model
28 |         self.lr = hyperparas['lr']
29 | 
30 |     def define_predict(self, obs):
31 |         """ use policy model self.model to predict the action probability
32 |         """
33 |         return self.model.policy(obs)
34 | 
35 |     def define_learn(self, obs, action, reward):
36 |         """ update policy model self.model with policy gradient algorithm
37 |         """
38 |         act_prob = self.model.policy(obs)
39 |         log_prob = layers.cross_entropy(act_prob, action)
40 |         cost = log_prob * reward
41 |         cost = layers.reduce_mean(cost)
42 |         optimizer = fluid.optimizer.Adam(self.lr)
43 |         optimizer.minimize(cost)
44 |         return cost
45 | 


--------------------------------------------------------------------------------
/docs/basic_structure/model.rst:
--------------------------------------------------------------------------------
 1 | Model (*Forward Part*)
 2 | =======================
 3 | A Model is owned by an Algorithm. Model is responsible for the entire network model (**forward part**) for the specific problems.
 4 | 
 5 | 
 6 | Methods
 7 | ----------
 8 | 1. policy(self, obs)
 9 | 
10 |     Define the structure of networks here. Algorithm will call this method to predict probabilities of actions. 
11 |     It is optional. 
12 | 
13 | 2. value(self, obs)
14 | 
15 |     Return: values: a dict of estimated values for the current observations and states. 
16 |     For example, "q_value" and "v_value".
17 | 
18 | 3. sync_params_to(self, target_net, gpu_id, decay=0.0, share_vars_parallel_executor=None)
19 | 
20 |     This method deepcopied the parameters from the current network to the target network, which two have the same structure.  
21 | 
22 | An example
23 | ------------
24 | .. code-block:: python
25 |     :linenos:
26 | 
27 |     class MLPModel(Model):
28 |         def __init__(self):
29 |             self.fc = layers.fc(size=64)
30 | 
31 |         def policy(self, obs):
32 |             out = self.fc(obs)
33 |             return out
34 |             
35 |     model = MLPModel() 
36 |     target_model = deepcopy(model) # automatically create new unique parameters names for target_model.fc
37 | 
38 |     # build program
39 |     x = layers.data(name='x', shape=[100], dtype="float32")
40 |     y1 = model.policy(x) 
41 |     y2 = target_model.policy(x)  
42 | 


--------------------------------------------------------------------------------
/docs/basic_structure/overview.rst:
--------------------------------------------------------------------------------
 1 | Overview
 2 | ==========
 3 | Three Components
 4 | ------------------
 5 | PARL is made up of three components: **Model, Algorithm, Agent**. They are constructed layer-by-layer to build the main body.
 6 | 
 7 | Model
 8 | ---------
 9 | A Model is owned by an Algorithm. Model is responsible for the entire network model (**forward part**) for the specific problems.
10 | 
11 | Algorithm
12 | ----------
13 | Algorithm defines the way to update the parameters in the Model (**backward part**). We already implemented some common
14 | used algorithms__, like DQN/DDPG/PPO/A3C, you can directly import and use them.
15 | 
16 | .. __: https://github.com/PaddlePaddle/PARL/tree/develop/parl/algorithms
17 | 
18 | Agent
19 | --------
20 | Agent interates with the environment and **generate data flow** outside the Algorithm. 
21 | 


--------------------------------------------------------------------------------
/docs/features.rst:
--------------------------------------------------------------------------------
 1 | Features
 2 | ===========
 3 | 
 4 | **1. Reproducible**
 5 | 
 6 | | We provide algorithms that reproduce stably the results of many influential reinforcement learning algorithms.
 7 | 
 8 | **2. Large Scale**
 9 | 
10 | | Ability to support high-performance parallelization of training with thousands of CPUs and multi-GPUs.
11 | 
12 | **3. Reusable**
13 | 
14 | | Algorithms provided in the repository can be directly adapted to new tasks by defining a forward network and training mechanism will be built automatically.
15 | 
16 | **4. Extensible**
17 | 
18 | | Build new algorithms quickly by inheriting the abstract class in the framework.
19 | 


--------------------------------------------------------------------------------
/docs/images/PARL-logo-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/images/PARL-logo-1.png


--------------------------------------------------------------------------------
/docs/images/PARL-logo-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/images/PARL-logo-2.png


--------------------------------------------------------------------------------
/docs/images/bar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/images/bar.png


--------------------------------------------------------------------------------
/docs/images/quickstart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/images/quickstart.png


--------------------------------------------------------------------------------
/docs/implementations/a2c.rst:
--------------------------------------------------------------------------------
1 | A2C
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.a2c
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/a3c.rst:
--------------------------------------------------------------------------------
1 | A3C
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.a3c
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/ddpg.rst:
--------------------------------------------------------------------------------
1 | DDPG
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.ddpg
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/ddqn.rst:
--------------------------------------------------------------------------------
1 | DDQN
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.ddqn
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:


--------------------------------------------------------------------------------
/docs/implementations/dqn.rst:
--------------------------------------------------------------------------------
1 | DQN
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.dqn
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/impala.rst:
--------------------------------------------------------------------------------
1 | IMPALA
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.fluid.impala.impala
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/maddpg.rst:
--------------------------------------------------------------------------------
1 | MADDPG
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.maddpg
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/oac.rst:
--------------------------------------------------------------------------------
1 | OAC
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.oac
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/pg.rst:
--------------------------------------------------------------------------------
1 | Policy Gradient
2 | ==================
3 | 
4 | .. automodule:: parl.algorithms.paddle.policy_gradient
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/ppo.rst:
--------------------------------------------------------------------------------
1 | PPO
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.ppo
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/qmix.rst:
--------------------------------------------------------------------------------
1 | QMIX
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.qmix
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/sac.rst:
--------------------------------------------------------------------------------
1 | SAC
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.sac
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/implementations/td3.rst:
--------------------------------------------------------------------------------
1 | TD3
2 | ==========
3 | 
4 | .. automodule:: parl.algorithms.paddle.td3
5 |     :members:
6 |     :no-undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | =============
 3 | Dependencies
 4 | -------------------
 5 | - Python 3.5+(Python 3.8+ is preferable for distributed training).
 6 | - `paddlepaddle>=2.0 <https://github.com/PaddlePaddle/Paddle>`_ (**Optional**, if you only want to use APIs related to parallelization alone)
 7 | 
 8 | Install
 9 | -------------
10 | PARL is distributed on PyPI and can be installed with pip:
11 | 
12 | .. code-block::
13 | 
14 |     pip install parl
15 | 
16 | or install from source:
17 | 
18 | .. code-block::
19 | 
20 |     pip install --upgrade git+https://github.com/PaddlePaddle/PARL.git
21 | 


--------------------------------------------------------------------------------
/docs/installation_guide.md:
--------------------------------------------------------------------------------
 1 | # Installation Guide
 2 | 
 3 | ## Detailed Installation Steps
 4 | 
 5 | 1. **Environment Preparation**
 6 |     - Supported Python versions: 3.7 - 3.10 (tested on Linux systems).
 7 | 
 8 | 2. **Install the DL Framework**
 9 |    - For the CPU version, simply run the following command:
10 |      ```bash
11 |      pip install paddlepaddle
12 |      ```
13 |    - For the GPU version:
14 |      - On Linux, the maximum supported version is 2.5:
15 |        ```bash
16 |        pip install paddlepaddle-gpu==2.5
17 |        ```
18 |      - On Windows, the maximum supported version is 2.2.1:
19 |        ```bash
20 |        pip install paddlepaddle-gpu==2.2.1
21 |        ```
22 | 
23 | 3. **Adjust Numpy Version**
24 |    - After installing PaddlePaddle, if the current numpy version is higher than 1.23.5, reinstall numpy:
25 |      ```bash
26 |      pip install numpy==1.23.5
27 |      ```
28 | 
29 | 4. **Install PARL and Gym**
30 |    - Run the following command to install the latest versions of PARL and Gym:
31 |      ```bash
32 |      pip install parl gym
33 |      ```
34 | 
35 | 5. **Test the Installation**
36 |    - Use the following command to run the quick-start test script:
37 |      ```bash
38 |      python examples/QuickStart/train.py
39 |      ```
40 | 
41 | ---
42 | 


--------------------------------------------------------------------------------
/docs/installation_guide_cn.md:
--------------------------------------------------------------------------------
 1 | # Installation Guide
 2 | 
 3 | ## 详细安装步骤
 4 | 
 5 | 1. **环境准备**
 6 |     - 已测试的 Python 版本范围为:3.7 - 3.10(Linux 系统下)
 7 | 
 8 | 2. **dl框架安装**
 9 |    - 若安装 CPU 版,正常运行以下命令即可:
10 |      ```bash
11 |      pip install paddlepaddle
12 |      ```
13 |    - 若安装 GPU 版:
14 |      - 在 Linux 下,支持的最大版本号为 2.5:
15 |        ```bash
16 |        pip install paddlepaddle-gpu==2.5
17 |        ```
18 |      - 在 Windows 下,支持的最大版本号为 2.2.1:
19 |        ```bash
20 |        pip install paddlepaddle-gpu==2.2.1
21 |        ```
22 | 
23 | 3. **Numpy 版本调整**
24 |    - 若安装paddle后,当前 numpy 版本高于 1.23.5,需要重新安装:
25 |      ```bash
26 |      pip install numpy==1.23.5
27 |      ```
28 | 
29 | 4. **安装 PARL 和 Gym**
30 |    - 运行以下命令安装最新版 PARL 和 Gym:
31 |      ```bash
32 |      pip install parl gym
33 |      ```
34 | 
35 | 5. **测试安装**
36 |    - 使用以下命令运行快速启动测试脚本:
37 |      ```bash
38 |      python examples/QuickStart/train.py
39 |      ```
40 | 
41 | ---


--------------------------------------------------------------------------------
/docs/locale/zh_CN/LC_MESSAGES/api_docs.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2019, nlp-ol@baidu.com
 3 | # This file is distributed under the same license as the PARL package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2021.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PARL \n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2021-05-18 14:23+0800\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language-Team: LANGUAGE <LL@li.org>\n"
15 | "MIME-Version: 1.0\n"
16 | "Content-Type: text/plain; charset=utf-8\n"
17 | "Content-Transfer-Encoding: 8bit\n"
18 | "Generated-By: Babel 2.9.1\n"
19 | 
20 | #: ../../api_docs/index.rst:7
21 | msgid "PARL Documentation"
22 | msgstr "PARL 文档"
23 | 
24 | #: ../../api_docs/utils.rst:2
25 | msgid "parl.Model"
26 | msgstr ""
27 | 
28 | 


--------------------------------------------------------------------------------
/docs/overview/abstractions.rst:
--------------------------------------------------------------------------------
 1 | Abstractions
 2 | ----------------
 3 | .. image:: ../../.github/abstractions.png
 4 |   :align: center
 5 |   :width: 400px
 6 | 
 7 | | PARL aims to build an **agent** for training algorithms to perform complex tasks.
 8 | | The main abstractions introduced by PARL that are used to build an agent recursively are the following:
 9 | 
10 | * ``Model`` is abstracted to construct the forward network which defines a policy network or critic network given state as input.
11 | 
12 | * ``Algorithm`` describes the mechanism to update parameters in the *model* and often contains at least one model.
13 | 
14 | * ``Agent``, a data bridge between the *environment* and the *algorithm*, is responsible for data I/O with the outside environment and describes data preprocessing before feeding data into the training process.
15 | 
16 | Note: For more information about base classes, please visit our :doc:`tutorial <../tutorial/getting_started>` and :doc:`API document <../apis/model>`.


--------------------------------------------------------------------------------
/docs/overview/features.rst:
--------------------------------------------------------------------------------
 1 | Features
 2 | ----------------
 3 | 
 4 | **1. Reproducible**
 5 | 
 6 | | We provide algorithms that reproduce stably the results of many influential reinforcement learning algorithms.
 7 | 
 8 | **2. Large Scale**
 9 | 
10 | | Ability to support high-performance parallelization of training with thousands of CPUs and multi-GPUs.
11 | 
12 | **3. Reusable**
13 | 
14 | | Algorithms provided in the repository can be directly adapted to new tasks by defining a forward network and training mechanism will be built automatically.
15 | 
16 | **4. Extensible**
17 | 
18 | | Build new algorithms quickly by inheriting the abstract class in the framework.
19 | 


--------------------------------------------------------------------------------
/docs/parallel_training/comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/parallel_training/comparison.png


--------------------------------------------------------------------------------
/docs/parallel_training/elapsed_time.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/parallel_training/elapsed_time.jpg


--------------------------------------------------------------------------------
/docs/parallel_training/file_distribution.rst:
--------------------------------------------------------------------------------
 1 | File Distribution
 2 | ==================
 3 | 
 4 | File distribution is an important function of distributed parallel computing. It is responsible for distributing user's code
 5 | and configuration files to different machines, so that all machines perform parallel computing using same code. By default, all ``.py`` files that are located in the same directory
 6 | as the XPARL distribution main file (such as ``main.py`` ) will be distributed. But sometimes users need to distribute some specific files, such as model files, configuration files, and Python code in subdirectories (submodules for import).
 7 | In order to meet this demand, ``parl.connect`` provides an interface where users can directly specify the files or codes that need to be distributed.
 8 | 
 9 | Example:
10 | ################
11 | 
12 | The file directory structure is as follows, we want to distribute the ``.py`` files in the policy folder. We can pass the files that we want to distribute to the ``distributed_files`` parameter when ``connect``, this parameter also supports regular expressions.
13 | 
14 | .. code-block::
15 | 
16 |     .
17 |     ├── main.py
18 |     └── policy
19 |         ├── agent.py
20 |         ├── config.ini
21 |         └── __init__.py
22 | 
23 | .. code-block:: python
24 | 
25 |     parl.connect("localhost:8004", distributed_files=['./policy/*.py', './policy/*.ini'])
26 | 


--------------------------------------------------------------------------------
/docs/parallel_training/poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/parallel_training/poster.png


--------------------------------------------------------------------------------
/docs/questions/distributed_training.rst:
--------------------------------------------------------------------------------
 1 | xparl questions 
 2 | ================================
 3 | Recommended data types in xparl
 4 | ###############################
 5 | Python is an interpreted language, unlike C++ that compile the codes before running. Thus xparl lacks prior knowledge of the data to be transmitted to a remote class instance, making it inadequate to convert the user-defined data into a binary data stream. Therefore, it is advisable for users to utilize native Python data types such as Int, Dict, and Numpy Array when passing arguments to or retrieving results from a remote class instance.
 6 | 
 7 | RL questions
 8 | ================================
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | paddlepaddle==2.4.2
2 | 


--------------------------------------------------------------------------------
/docs/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd "$(dirname "$0")"
3 | source ~/.bashrc
4 | export PATH="/root/miniconda3/bin:$PATH"
5 | source deactivate
6 | source activate docs
7 | pip install /work/
8 | make html
9 | 


--------------------------------------------------------------------------------
/docs/tutorial/add_histogram.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/tutorial/add_histogram.jpg


--------------------------------------------------------------------------------
/docs/tutorial/add_scalar.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/tutorial/add_scalar.jpg


--------------------------------------------------------------------------------
/docs/tutorial/output_as_csv.rst:
--------------------------------------------------------------------------------
 1 | CSV Logger
 2 | ==========
 3 | 
 4 | PARL provides a tool to output the indicators during the training process to a CSV table. The tool can be imported using:
 5 | 
 6 | .. code-block:: python
 7 | 
 8 |     from parl.utils import CSVLogger
 9 | 
10 | How to Use
11 | -------------
12 | 
13 | 1. Input path for saving the CSV file and initialize ``CSVLogger``:
14 | 
15 | .. code-block:: python
16 | 
17 |     csv_logger = CSVLogger("result.csv")
18 | 
19 | 2. Output a dictionary that contains the indicators:
20 | 
21 | `Parameters`:
22 | 
23 | * result(dict) - indicators that need to be outputted as CSV file
24 | 
25 | `Method`:
26 | 
27 | .. code-block:: python
28 | 
29 |     csv_logger.log_dict({"loss": 1, "reward": 2})
30 | 
31 | Example
32 | -------------
33 | 
34 | .. code-block:: python
35 | 
36 |     from parl.utils import CSVLogger
37 | 
38 |     csv_logger = CSVLogger("result.csv")
39 |     csv_logger.log_dict({"loss": 1, "reward": 2})
40 |     csv_logger.log_dict({"loss": 3, "reward": 4})
41 | 
42 | The CSV file will contain:
43 | 
44 | .. code-block::
45 | 
46 |     loss,reward
47 |     1,2
48 |     3,4
49 | 


--------------------------------------------------------------------------------
/docs/tutorial/save_param.rst:
--------------------------------------------------------------------------------
 1 | Save and Restore Parameters
 2 | =============================
 3 | 
 4 | Goal of this tutorial:
 5 | 
 6 | - Learn how to save and restore parameters.
 7 | 
 8 | **Scene 1:**
 9 | 
10 | Sometimes we need to save the parameters into a file and reuse them later on. PARL provides operators
11 | to save parameters to a file and restore parameters from a file easily. You only need several lines to implement this.
12 | 
13 | Here is a demonstration of usage:
14 | 
15 | .. code-block:: python
16 | 
17 |     agent = AtariAgent()
18 |     # save the parameters of agent to ./model_dir
19 |     agent.save('./model_dir')             
20 |     # restore the parameters from ./model_dir to agent  
21 |     agent.restore('./model_dir')    
22 | 
23 | **Scene 2:**
24 | 
25 | Sometimes during training procedure, we want to sync the latest model parameters to Agents (Actors) on other servers. To deal with this, we need to first move the parameters to memory then
26 | set the parameters of Agents (Actors) on other servers.
27 | 
28 | .. code-block:: python
29 | 
30 |     #--------------Agent---------------
31 |     weights = agent.get_weights()
32 |     #--------------Remote Actor--------------
33 |     actor.set_weights(weights)


--------------------------------------------------------------------------------
/docs/zh_CN/tutorial/csv_logger.md:
--------------------------------------------------------------------------------
 1 | # **教程:表格输出实验数据**
 2 | PARL提供了将训练过程中的指标输出到CSV表格的工具。工具导入方法:
 3 | 
 4 | `from parl.utils import CSVLogger`
 5 | 
 6 | 
 7 | ### 使用教程
 8 | 1. 传入CSV文件保存路径,并初始化CSVLogger
 9 | 
10 | `csv_logger = CSVLogger("result.csv")`
11 | 
12 | 2. 输出以字典形式记录的指标
13 | 
14 | 参数
15 | - result (dict) – 需要输出到CSV文件的指标字典
16 | 
17 | 方法
18 | 
19 | `csv_logger.log_dict({"loss": 1, "reward": 2})`
20 | 
21 | ### 完整例子
22 | ```python
23 | from parl.utils import CSVLogger
24 | 
25 | csv_logger = CSVLogger("result.csv")
26 | csv_logger.log_dict({"loss": 1, "reward": 2})
27 | csv_logger.log_dict({"loss": 3, "reward": 4})
28 | 
29 | ```
30 | #### 预期结果
31 | 
32 | result.csv文件内容如下:
33 | 
34 | ```
35 | loss,reward
36 | 1,2
37 | 3,4
38 | ```
39 | 


--------------------------------------------------------------------------------
/docs/zh_CN/tutorial/param.md:
--------------------------------------------------------------------------------
 1 | # **教程:模型参数管理**
 2 | 场景1: 在训练过程中,我们有时候需要把训练好的模型参数保存到本地,用于后续的部署或者评估。
 3 | 
 4 | 当用户构建好agent之后,可以直接通过agent的相关接口来完成参数的存储。
 5 | ```python
 6 | agent = AtariAgent()
 7 | # 保存参数到 ./model_dir
 8 | agent.save('./model_dir')
 9 | # 恢复参数到这个agent上
10 | agent.restore('./model_dir')
11 | ```
12 | 
13 | 场景2: 并行训练过程中,经常需要把最新的模型参数同步到另一台服务器上,这时候,需要把模型参数拿到内存中,然后再赋值给另一台机器上的agent(actor)。
14 | 
15 | ```python
16 | #--------------Agent---------------
17 | weights = agent.get_weights()
18 | #--------------Remote Actor--------------
19 | actor.set_weights(weights)
20 | ```
21 | 
22 | 场景3: 在训练完成后,需要把训练好的模型结构和参数保存到本地,用于后续的推理部署。
23 | 
24 | 直接通过agent的相关接口来完成网络结构和参数的存储。
25 | 
26 | ```python
27 | # 保存网络结构和参数到./inference_model_dir
28 | agent.save_inference_model('./inference_model_dir', [[None, 128]], ['float32'])
29 | ```
30 | 
31 | 对于Actor-Critic类算法,只需要保存其中的Actor网络。
32 | 
33 | ```python
34 | # 保存Actor-Critic算法的策略网络结构和参数到./inference_ac_model_dir
35 | agent.save_inference_model('./inference_ac_model_dir', [[None, 128]], ['float32'], agent.alg.model.actor_model)
36 | ```
37 | 


--------------------------------------------------------------------------------
/docs/zh_CN/tutorial/summary.md:
--------------------------------------------------------------------------------
 1 | # **教程:绘制训练曲线**
 2 | PARL集成了tensorboardX作为可视化的工具。工具导入方法:
 3 | 
 4 | `from parl.utils import summary`。
 5 | 
 6 | 
 7 | ### 折线图接口:add_scalar
 8 | summary.add_scalar(tag, scalar_value, global_step=None)
 9 | 
10 | 常用的参数
11 | - tag (string) – Data identifier
12 | - scalar_value (float or string/blobname) – Value to save
13 | - global_step (int) – Global step value to record
14 | 
15 | 例子:
16 | ```python
17 | from parl.utils import summary
18 | 
19 | x = range(100)
20 | for i in x:
21 |     summary.add_scalar('y=2x', i * 2, i)
22 | ```
23 | 预期结果:
24 | 
25 | <img src="../../tutorial/add_scalar.jpg" width="300"/>
26 | 
27 | ### 柱形接口:add_histogram
28 | 
29 | summary.add_histogram(tag, values, global_step=None)
30 | 
31 | 常用的参数
32 | - tag (string) – Data identifier
33 | - values (np.ndarray) – histogram of which to be saved
34 | - global_step (int) – Global step value to record
35 | 
36 | 例子:
37 | ```python
38 | from parl.utils import summary
39 | import numpy as np
40 | 
41 | for i in range(10):
42 |     x = np.random.random(1000)
43 |     summary.add_histogram('distribution centers', x + i, i)
44 | ```
45 | 
46 | 预期结果:
47 | 
48 | <img src="../../tutorial/add_histogram.jpg" width="300"/>
49 | 
50 | ### 修改数据保存路径接口
51 | 默认的数据保存路径是:`./train_log`, summary的数据路径和logger绑定到一起的,所以直接修改logger的保存路径即可:
52 | ```python
53 | from parl.utils import logger
54 | logger.set_dir('./train_log/exp1')
55 | ```
56 | 


--------------------------------------------------------------------------------
/docs/zh_CN/xparl/.images/dataset1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/dataset1.png


--------------------------------------------------------------------------------
/docs/zh_CN/xparl/.images/dataset2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/dataset2.png


--------------------------------------------------------------------------------
/docs/zh_CN/xparl/.images/dataset3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/dataset3.png


--------------------------------------------------------------------------------
/docs/zh_CN/xparl/.images/dataset4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/dataset4.png


--------------------------------------------------------------------------------
/docs/zh_CN/xparl/.images/log_server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/log_server.png


--------------------------------------------------------------------------------
/docs/zh_CN/xparl/debug.md:
--------------------------------------------------------------------------------
 1 | # **如何在xparl中debug**
 2 | 
 3 | 经过并行修饰符修饰的类,并没有在本地运行,而是跑在了集群上,相应地,我们也没法在本机上看到打印的log,比如之前的代码。
 4 | ```python
 5 | import parl
 6 | 
 7 | @parl.remote_class
 8 | class Actor(object):
 9 |   def hello_world(self):
10 |       print("Hello world.")
11 | 
12 |   def add(self, a, b):
13 |       return a + b
14 | 
15 | # 连接到集群(master节点)
16 | parl.connect("localhost:6006")
17 | 
18 | actor = Actor()
19 | actor.hello_world()# 因为计算是放在集群中执行,所以这里不会打印信息
20 | ```
21 | 
22 | 这种情况下,我们应该怎么debug,定位问题呢?
23 | 这里推荐两个方案:
24 | 
25 | - 注释并行修饰符
26 | 先不在集群上跑并行,而是在本地跑起来,根据输出的日志debug,调试通过后再增加并行修饰符。但是这种方法在静态图的神经网络框架中可能会引发静态图重复定义的问题,所以在用paddle或者tensorflow的时候不建议采用这种方法。
27 | 
28 | - 根据xparl的日志服务查看
29 | 在本地脚本连接到xparl集群之后,xparl会在程序中输出logserver的地址,通过浏览器访问这个网站即可实时查看每个并行任务的对应输出。
30 | 
31 | <img src="./.images/log_server.png" width="500"/>
32 | 


--------------------------------------------------------------------------------
/docs/zh_CN/xparl/distribute_files.md:
--------------------------------------------------------------------------------
 1 | # **如何在xparl中分发本地文件**
 2 | 
 3 | 文件分发是分布式并行计算的重要功能。它负责把用户的代码还有配置文件分发到不同的机器上,让所有的机器都运行同样的代码进行并行计算。默认情况下,XPARL分发主文件所在目录下,所有py结尾文件。但是有时候用户需要分发一些特定的文件,比如模型文件、配置文件、子目录下的Python代码(用于import的子模块)。为了满足这个需求,parl.connect提供了接口,用户可直接指定需要分发的文件或代码。
 4 | 
 5 | ### 例子
 6 | 
 7 | 文件目录结构如下,我们想分发policy文件夹中的py文件。
 8 | 我们可以在connect的时候传入想要分发的文件到`distributed_files`参数中,该参数支持正则表达式。
 9 | 
10 | ```
11 | .
12 | ├── main.py
13 | └── policy
14 |     ├── agent.py
15 |     ├── config.ini
16 |     └── __init__.py
17 | ```
18 | 
19 | ```python
20 | parl.connect("localhost:8004", distributed_files=['./policy/*.py', './policy/*.ini'])
21 | ```
22 | 


--------------------------------------------------------------------------------
/docs/zh_CN/xparl/introduction.md:
--------------------------------------------------------------------------------
 1 | # **PARL并行核心介绍: XPARL**
 2 | 
 3 | 随着强化学习能够解决的问题变得复杂,算法对于训练数据的需求也变得更大,为了提升算法的训练速度,PARL借鉴了[Pyro4](https://github.com/irmen/Pyro4)的设计理念,提出了更为高效实用的并行接口。
 4 | 
 5 | ### 简单易用
 6 | 通过一个简单的修饰符`@parl.remote_class`,用户就可以很简单地实现并行计算,无需关注繁琐的多进程通讯以及网络通讯,也不受Python多线程GIL锁的限制。
 7 | 
 8 | ### 高性能
 9 | `@parl.remote_class` 可以让我们实现真正意义上的多线程并发计算(堪比C++的多线程)。正如下图a所示,python原生的多线程加速表现很糟糕(由于全局锁GIL的存在),但是我们可以看到,PARL的并行可以线性地减少运行时间,从而提升并发效率。
10 | 
11 | ### Web 页面监控集群信息
12 | 在多机并行计算的时候,PARL在启动集群的时候提供了web服务,用户可以通过这个页面查看每台机器上的内存、CPU使用率等,同时也可以查看每个任务占用了多少集群资源。
13 | 
14 | ### 全框架兼容
15 | PARL的并行可以兼容目前市场上的任何深度学习框架,比如tensorflow、pytorch、mxnet等。通过增加并行修饰符`@parl.remote_class`,用户就可以把他们之前的代码转换成并行代码。
16 | 
17 | 
18 | # 为什么用PARL
19 | 
20 | ## 高吞吐量、高并发
21 | PARL在实现底层的并行计算时,是通过端到端的这种网络传输,也就是在进行并发任务时,没有额外的网络损耗。这种并行设计,相比于RLlib需要通过Redis进行数据中转,PARL在同样的时间内,有更高的数据吞吐量。根据我们之前做的实验对比,运行同样的IMPALA算法,在同样的机器上,PARL的并行性能是更优秀的。
22 | 
23 | 
24 | ## 自动分发本地文件
25 | 市面上的并行框架大部分得要用户手动同步文件才可以跑起并行代码,比如配置文件得要手动或者通过命令分发到不同机器,parl可以自动分发当前目录下的代码文件,实现无缝的多机并行。
26 | 
27 | <img src="../../parallel_training/comparison.png" width="1000"/>
28 | 


--------------------------------------------------------------------------------
/docs/zh_CN/xparl/serialize.md:
--------------------------------------------------------------------------------
 1 | # **序列化/反序列化加速(非必须)**
 2 | 
 3 | PARL默认使用cloudpickle库进行数据的序列化和反序列化【数据是以序列化后的字节流形式在xparl中进行传输】;如果Python环境有下载pyarrow库的话,则会使用pyarrow库进行序列化和反序列化(由于pyarrow兼容性不够好,PARL不会默认下载该库)。
 4 | 
 5 | 不同数据场景下,pyarrow和cloudpickle的表现优劣不同,用户可以基于自己的使用场景选择是否要下载pyarrow库,一般而言,使用python3.8+自带的序列化协议已经可以满足大部分场景的需求。
 6 | 
 7 | ### 性能对比
 8 | 这里提供了pyarrow和cloudpickle在不同数据下的序列化和反序列化的平均耗时作为参考:
 9 | 
10 | - 测试数据一:`data = [np.random.RandomState(0).randn(50, 50)] * 10`
11 | - 测试数据二:`data = [np.random.RandomState(0).randn(500, 500)] * 10`
12 | - 测试数据三:`data = [np.random.RandomState(0).randn(5000, 5000)] * 10`
13 | - 测试数据四:`data = np.random.RandomState(0).randn(5000, 50000)`
14 | 
15 | > pyarrow版本:python2使用pyarrow==0.16.0,python3中使用pyarrow==2.0.0
16 | 
17 | <img src="./.images/dataset1.png" width="400"/> <img src="./.images/dataset2.png" width="400"/>
18 | <img src="./.images/dataset3.png" width="400"/> <img src="./.images/dataset4.png" width="400"/>
19 | 
20 | ### 对比结论
21 | - 在序列化/反序列化**超大Numpy矩阵**时,pyarrow表现明显比cloudpickle好
22 | - 使用3.8+版本的Python也能提升序列化性能。(主要是python3.8+版本对pickle进行了升级,支持protocol=5)
23 | 


--------------------------------------------------------------------------------
/evo_kit/DeepES.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/evo_kit/DeepES.gif


--------------------------------------------------------------------------------
/evo_kit/README.md:
--------------------------------------------------------------------------------
 1 | # EvoKit
 2 | EvoKit 是一个集合了多种进化算法、兼容多种类预测框架的进化算法库,主打快速上线验证 。
 3 | <p align="center">
 4 | <img src="DeepES.gif" alt="PARL" width="500"/>
 5 | </p>
 6 | 
 7 | ## 使用示范
 8 | ```c++
 9 | //实例化一个预测,根据配置文件加载模型,采样方式(Gaussian\CMA sampling..)、更新方式(SGD\Adam)等
10 | auto agent = ESAgent(config); 
11 | 
12 | for (int i = 0; i < 10; ++i) {
13 |    auto sampling_agnet = agent->clone(); // clone出一个sampling agent
14 |    SamplingInfo info;
15 |    sampling_agent->add_noise(info); // 参数扰动,同时保存随机种子到info中
16 |    int reward = evaluate(env, sampling_agent); //评估参数
17 |    noisy_info.push_back(info); // 记录随机噪声对应种子
18 |    noisy_rewards.push_back(reward); // 记录评估结果
19 | }
20 | //根据评估结果、随机种子更新参数,然后重复以上过程,直到收敛。
21 | agent->update(noisy_info, noisy_rewards);
22 | ```
23 | 
24 | ## 一键运行demo列表
25 | - sh ./scripts/build.sh
26 | 
27 | ## 相关依赖:
28 | - Protobuf2
29 | - OpenMP
30 | - [glog](https://github.com/gflags/gflags/blob/master/INSTALL.md)
31 | - [gflag](https://github.com/google/glog)
32 | 
33 | ## 额外依赖:
34 | 
35 | ### 使用PaddleLite
36 | 下载PaddleLite的X86预编译库,或者编译PaddleLite源码,得到inference_lite_lib文件夹,放在当前目录中。(可参考:[PaddleLite使用X86预测部署](https://paddle-lite.readthedocs.io/zh/latest/demo_guides/x86.html))
37 | 
38 | ### 使用torch 
39 | 下载[libtorch](https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip)或者编译torch源码,得到libtorch文件夹,放在当前目录中。
40 | 


--------------------------------------------------------------------------------
/evo_kit/core/include/evo_kit/optimizer_factory.h:
--------------------------------------------------------------------------------
 1 | //   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef EVO_KIT_OPTIMIZER_FACTORY_H
16 | #define EVO_KIT_OPTIMIZER_FACTORY_H
17 | 
18 | #include <algorithm>
19 | #include <glog/logging.h>
20 | #include <memory>
21 | #include "evo_kit/adam_optimizer.h"
22 | #include "evo_kit/evo_kit.pb.h"
23 | #include "evo_kit/optimizer.h"
24 | #include "evo_kit/sgd_optimizer.h"
25 | 
26 | namespace evo_kit {
27 | /* @brief: create an optimizer according to the configuration"
28 |  * @args:
29 |  *    config: configuration for the optimizer
30 |  *
31 |  */
32 | std::shared_ptr<Optimizer> create_optimizer(const OptimizerConfig& optimizer_config);
33 | 
34 | } // namespace
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/evo_kit/core/include/evo_kit/sampling_factory.h:
--------------------------------------------------------------------------------
 1 | //   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef EVO_KIT_SAMPLING_FACTORY_H
16 | #define EVO_KIT_SAMPLING_FACTORY_H
17 | 
18 | #include <algorithm>
19 | #include <glog/logging.h>
20 | #include <memory>
21 | #include "evo_kit/cached_gaussian_sampling.h"
22 | #include "evo_kit/evo_kit.pb.h"
23 | #include "evo_kit/gaussian_sampling.h"
24 | #include "evo_kit/sampling_method.h"
25 | 
26 | namespace evo_kit {
27 | /* @brief: create an sampling_method according to the configuration"
28 |  * @args:
29 |  *    config: configuration for the EvoKit
30 |  *
31 |  */
32 | std::shared_ptr<SamplingMethod> create_sampling_method(const EvoKitConfig& Config);
33 | 
34 | } // namespace
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/evo_kit/core/src/sampling_factory.cc:
--------------------------------------------------------------------------------
 1 | //   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "evo_kit/sampling_factory.h"
16 | 
17 | namespace evo_kit {
18 | 
19 | 
20 | std::shared_ptr<SamplingMethod> create_sampling_method(const EvoKitConfig& config) {
21 |     std::shared_ptr<SamplingMethod> sampling_method;
22 |     bool cached = config.gaussian_sampling().cached();
23 | 
24 |     if (cached) {
25 |         sampling_method = std::make_shared<CachedGaussianSampling>();
26 |     } else {
27 |         sampling_method = std::make_shared<GaussianSampling>();
28 |     }
29 | 
30 |     bool success = sampling_method->load_config(config);
31 | 
32 |     if (success) {
33 |         return sampling_method;
34 |     } else {
35 |         LOG(ERROR) << "[EvoKit] Fail to create sampling_method";
36 |         return nullptr;
37 |     }
38 | 
39 | }
40 | 
41 | }//namespace
42 | 


--------------------------------------------------------------------------------
/evo_kit/demo/cartpole_config.prototxt:
--------------------------------------------------------------------------------
 1 | seed: 1024
 2 | gaussian_sampling {
 3 |   std: 0.5
 4 |   cached: true
 5 |   cache_size: 100000
 6 | }
 7 | optimizer {
 8 |   type: "Adam"
 9 |   base_lr: 0.05
10 |   momentum: 0.9
11 |   beta1: 0.9
12 |   beta2: 0.999
13 |   epsilon: 1e-08
14 | }
15 | async_es {
16 |   model_iter_id: 0
17 | }
18 | 


--------------------------------------------------------------------------------
/evo_kit/demo/paddle/cartpole_init_model.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/evo_kit/demo/paddle/cartpole_init_model.zip


--------------------------------------------------------------------------------
/evo_kit/demo/torch/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 2.6)
 2 | project (EvoKit_demo)
 3 | set(TARGET parallel_main)
 4 | 
 5 | set(CMAKE_CXX_STANDARD 11)
 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 7 | set(CMAKE_CXX_EXTENSIONS OFF)
 8 | 
 9 | find_package(OpenMP)
10 | if (OPENMP_FOUND)
11 |     set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
12 |     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
13 |     set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
14 | endif()
15 | 
16 | list(APPEND CMAKE_PREFIX_PATH "./libtorch")
17 | find_package(Torch REQUIRED ON)
18 | set(demo "${PROJECT_SOURCE_DIR}/cartpole_solver_parallel.cc")
19 | 
20 | 
21 | ########## main ##########
22 | add_executable(${TARGET} ${demo} ${framework_src}) 
23 | target_link_libraries(${TARGET} gflags protobuf pthread glog)
24 | 
25 | ########## Torch libraries ##########
26 | target_link_libraries(${TARGET} "${TORCH_LIBRARIES}")
27 | 
28 | 
29 | ########## EvoKit libraries ##########
30 | list(APPEND CMAKE_PREFIX_PATH "./libevokit/cmake/Torch")
31 | find_package(EvoKit)
32 | target_link_libraries(${TARGET} "${EVOKIT_LIBRARY}")
33 | 


--------------------------------------------------------------------------------
/evo_kit/scripts/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | sh scripts/lib_install.sh torch
 4 | 
 5 | cd demo/torch
 6 | 
 7 | #---------------libtorch-------------#
 8 | if [ ! -d "./libtorch" ];then
 9 |   echo "Cannot find the torch library: ./libtorch"
10 |     echo "Downloading Torch library"
11 |     wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip
12 |     unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
13 |     rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
14 |     echo "Torch library Downloaded"
15 | fi
16 | 
17 | 
18 | #---------------libevokit-------------#
19 | cp -r ../../libevokit ./
20 | if [ ! -d "./libevokit" ];then
21 |   echo "Cannot find the EvoKit library: ./libevokit"
22 |   echo "Please put the EvoKit libraray to current folder according the instruction in README" # TODO: readme
23 |   exit 1
24 | fi
25 | 
26 | # proto
27 | cp ../cartpole_config.prototxt ./
28 | 
29 | #----------------build---------------#
30 | rm -rf build
31 | mkdir build
32 | cd build
33 | cmake ../
34 | make -j10
35 | cd -
36 | 
37 | #-----------------run----------------#
38 | ./build/parallel_main
39 | 
40 | 
41 | cd ../..
42 | 


--------------------------------------------------------------------------------
/evo_kit/scripts/lib_install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# != 1 ]; then
 4 |   echo "You must choose one framework (paddle/torch) to compile EvoKit."
 5 |   exit 0
 6 | fi
 7 | 
 8 | if [ $1 = "paddle" ]; then
 9 |   #---------------paddlelite-------------#
10 |   if [ ! -d "./inference_lite_lib" ];then
11 |     echo "Cannot find the PaddleLite library: ./inference_lite_lib"
12 |     echo "Please put the PaddleLite libraray to current folder according the instruction in README"
13 |     exit 1
14 |   fi
15 |   
16 |   # Initialization model
17 |   if [ ! -d ./demo/paddle/cartpole_init_model ]; then
18 |     unzip ./demo/paddle/cartpole_init_model.zip -d ./demo/paddle/
19 |   fi
20 | 
21 |   FLAGS=" -DWITH_PADDLE=ON"
22 | elif [ $1 = "torch" ]; then
23 |   FLAGS=" -DWITH_TORCH=ON"
24 | else
25 |   echo "Invalid arguments. [paddle/torch]"
26 |   exit 0
27 | fi
28 | 
29 | 
30 | #----------------protobuf-------------#
31 | cd core/proto/
32 | protoc evo_kit/evo_kit.proto --cpp_out . 
33 | cd -
34 | 
35 | #----------------build---------------#
36 | echo ${FLAGS}
37 | rm -rf build
38 | mkdir build
39 | cd build
40 | cmake ../ ${FLAGS}
41 | make -j10
42 | make install
43 | cd -
44 | 


--------------------------------------------------------------------------------
/evo_kit/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 2.6)
 2 | project (EvoKit_demo)
 3 | set(TARGET unit_test_main)
 4 | 
 5 | set(CMAKE_CXX_STANDARD 11)
 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 7 | set(CMAKE_CXX_EXTENSIONS OFF)
 8 | 
 9 | find_package(GTest REQUIRED)
10 | find_package(OpenMP)
11 | if (OPENMP_FOUND)
12 |   set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
13 |   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
14 |   set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
15 | endif()
16 | 
17 | # Torch lib
18 | list(APPEND CMAKE_PREFIX_PATH "../libtorch")
19 | find_package(Torch REQUIRED ON)
20 | 
21 | # include and source
22 | include_directories("${PROJECT_SOURCE_DIR}/include")
23 | file(GLOB test_src "${PROJECT_SOURCE_DIR}/src/*.cc")
24 | 
25 | # make
26 | add_executable(${TARGET} "unit_test.cc" ${core_src} ${agent_src} ${test_src})
27 | target_link_libraries(${TARGET} gflags protobuf pthread glog gtest "${TORCH_LIBRARIES}")
28 | 
29 | 
30 | ########## EvoKit libraries ##########
31 | list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/libevokit/cmake/Torch")
32 | find_package(EvoKit)
33 | target_link_libraries(${TARGET} "${EVOKIT_LIBRARY}")
34 | 


--------------------------------------------------------------------------------
/evo_kit/test/prototxt/torch_sin_cached_config.prototxt:
--------------------------------------------------------------------------------
 1 | seed : 1024
 2 | 
 3 | gaussian_sampling {
 4 |   std: 0.005
 5 |   cached: true
 6 |   cache_size : 100000
 7 | }
 8 | 
 9 | optimizer {
10 |   type: "Adam",
11 |   base_lr: 0.005,
12 |   momentum: 0.9,
13 |   beta1: 0.9,
14 |   beta2: 0.999,
15 |   epsilon: 1e-8,
16 | }
17 | 


--------------------------------------------------------------------------------
/evo_kit/test/prototxt/torch_sin_config.prototxt:
--------------------------------------------------------------------------------
 1 | seed : 1024
 2 | 
 3 | gaussian_sampling {
 4 |   std: 0.005
 5 |   cached: false
 6 | }
 7 | 
 8 | optimizer {
 9 |   type: "Adam",
10 |   base_lr: 0.005,
11 |   momentum: 0.9,
12 |   beta1: 0.9,
13 |   beta2: 0.999,
14 |   epsilon: 1e-8,
15 | }
16 | 


--------------------------------------------------------------------------------
/evo_kit/test/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
 3 | 
 4 | #---------------libtorch-------------#
 5 | if [ ! -d "./libtorch" ];then
 6 | echo "Cannot find the torch library: ../libtorch"
 7 |   echo "Downloading Torch library"
 8 |   wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip
 9 |   unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
10 |   rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
11 |   echo "Torch library Downloaded"
12 | fi
13 | 
14 | #----------------protobuf-------------#
15 | cd core/proto/
16 | protoc evo_kit/evo_kit.proto --cpp_out . 
17 | cd -
18 | 
19 | #----------------build---------------#
20 | sh scripts/lib_install.sh torch
21 | 
22 | #----------------build test---------------#
23 | cd test
24 | 
25 | cp -r ../libevokit ./
26 | if [ ! -d "./libevokit" ];then
27 |   echo "Cannot find the EvoKit library: ./libevokit"
28 |   echo "Please put the EvoKit libraray to current folder according the instruction in README" # TODO: readme
29 |   exit 1
30 | fi
31 | 
32 | rm -rf build
33 | mkdir build
34 | cd build
35 | cmake ../
36 | make -j10
37 | 
38 | #-----------------run----------------#
39 | ./unit_test_main
40 | 
41 | cd ..
42 | 


--------------------------------------------------------------------------------
/evo_kit/test/src/utils_test.cc:
--------------------------------------------------------------------------------
 1 | //   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "gtest/gtest.h"
16 | #include <vector>
17 | #include "evo_kit/utils.h"
18 | 
19 | namespace evo_kit {
20 | 
21 | // Tests that the Utils::compute_centered_rank() method.
22 | TEST(UtilsTest, Method_compute_centered_ranks) {
23 |   float a[5] = {9.0, 8.0, 7.0, 6.0, 5.0};
24 |   std::vector<float> reward_vec(a, a+5);
25 |   EXPECT_EQ(compute_centered_ranks(reward_vec), true);
26 | }
27 | 
28 | 
29 | } // namespace
30 | 
31 | 


--------------------------------------------------------------------------------
/evo_kit/test/unit_test.cc:
--------------------------------------------------------------------------------
 1 | //   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "gtest/gtest.h"
16 | 
17 | int main(int argc, char **argv) {
18 |   ::testing::InitGoogleTest(&argc, argv);
19 |   return RUN_ALL_TESTS();
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/A2C/.result/result_a2c_paddle0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/A2C/.result/result_a2c_paddle0.png


--------------------------------------------------------------------------------
/examples/A2C/.result/result_a2c_paddle1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/A2C/.result/result_a2c_paddle1.png


--------------------------------------------------------------------------------
/examples/A2C/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.12.1
2 | atari-py==0.1.7
3 | opencv-python
4 | parl>=1.4.3
5 | paddlepaddle
6 | 


--------------------------------------------------------------------------------
/examples/AlphaZero/.pic/good_moves_rate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/AlphaZero/.pic/good_moves_rate.png


--------------------------------------------------------------------------------
/examples/AlphaZero/.pic/perfect_moves_rate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/AlphaZero/.pic/perfect_moves_rate.png


--------------------------------------------------------------------------------
/examples/AlphaZero/requirements.txt:
--------------------------------------------------------------------------------
1 | parl==1.4.3
2 | tqdm
3 | paddlepaddle==2.0.0
4 | 


--------------------------------------------------------------------------------
/examples/Baselines/GridDispatch_competition/README.md:
--------------------------------------------------------------------------------
1 | ## Baselines for grid dispatching competition
2 | 
3 | Competition link: [国家电网调控AI创新大赛:电网运行组织智能安排](https://aistudio.baidu.com/aistudio/competition/detail/111)
4 | 
5 | We provide a distributed SAC baseline based on PARL with paddlepaddle or torch:
6 | - [paddlepaddle baseline](paddle)
7 | - [torch baseline](torch)
8 | 


--------------------------------------------------------------------------------
/examples/Baselines/Halite_competition/paddle/encode_model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import base64
16 | import pickle
17 | import paddle
18 | 
19 | if __name__ == '__main__':
20 | 
21 |     model = paddle.load('./model/latest_ship_model.pth')
22 |     actor = model['actor']
23 | 
24 |     for name, param in actor.items():
25 |         actor[name] = param.numpy()
26 | 
27 |     model_byte = base64.b64encode(pickle.dumps(actor))
28 |     with open('./model/actor.txt', 'wb') as f:
29 |         f.write(model_byte)
30 | 


--------------------------------------------------------------------------------
/examples/Baselines/Halite_competition/paddle/model/latest_ship_model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/Baselines/Halite_competition/paddle/model/latest_ship_model.pth


--------------------------------------------------------------------------------
/examples/Baselines/Halite_competition/paddle/requirements.txt:
--------------------------------------------------------------------------------
1 | parl>=2.0.0
2 | paddlepaddle>=2.0.0
3 | 


--------------------------------------------------------------------------------
/examples/Baselines/Halite_competition/torch/encode_model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import base64
16 | import pickle
17 | import torch
18 | 
19 | if __name__ == '__main__':
20 | 
21 |     model = torch.load('./model/latest_ship_model.pth')
22 |     actor = model["actor"]
23 | 
24 |     for name, param in actor.items():
25 |         actor[name] = param.numpy()
26 | 
27 |     model_byte = base64.b64encode(pickle.dumps(actor))
28 |     with open('./model/actor.txt', 'wb') as f:
29 |         f.write(model_byte)
30 | 


--------------------------------------------------------------------------------
/examples/Baselines/Halite_competition/torch/model/latest_ship_model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/Baselines/Halite_competition/torch/model/latest_ship_model.pth


--------------------------------------------------------------------------------
/examples/Baselines/Halite_competition/torch/requirements.txt:
--------------------------------------------------------------------------------
1 | parl>=2.0.0
2 | torch>=1.6.0
3 | 


--------------------------------------------------------------------------------
/examples/CARLA_SAC/.benchmark/Lane_bend.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/CARLA_SAC/.benchmark/Lane_bend.gif


--------------------------------------------------------------------------------
/examples/CARLA_SAC/.benchmark/carla_sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/CARLA_SAC/.benchmark/carla_sac.png


--------------------------------------------------------------------------------
/examples/CARLA_SAC/model.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/CARLA_SAC/model.ckpt


--------------------------------------------------------------------------------
/examples/CQL/requirements.txt:
--------------------------------------------------------------------------------
1 | paddlepaddle>=2.0.0
2 | gym==0.20.0
3 | parl>=2.0.0
4 | mujoco_py==2.0.2.8
5 | git+https://github.com/rail-berkeley/d4rl@master#egg=d4rl
6 | 


--------------------------------------------------------------------------------
/examples/DDPG/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce DDPG with PARL
 2 | Based on PARL, the DDPG algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | > Paper: DDPG in [Continuous control with deep reinforcement learning](https://arxiv.org/abs/1509.02971)
 5 | 
 6 | ### Mujoco games introduction
 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco).
 8 | 
 9 | ### Benchmark result
10 | 
11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/DDPG/paddle/result.png" width="600" alt="DDPG_results"/>
12 | + Each experiment was run three times with different seeds
13 | 
14 | ## How to use
15 | ### Dependencies:
16 | + python3.7+
17 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
18 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle)
19 | + gym>=0.26.0
20 | + mujoco>=2.2.2
21 | 
22 | ### Start Training:
23 | ```
24 | # To train an agent for HalfCheetah-v4 game
25 | # python train.py
26 | 
27 | # To train for other game
28 | # python train.py --env [ENV_NAME]
29 | 


--------------------------------------------------------------------------------
/examples/DDPG/requirements.txt:
--------------------------------------------------------------------------------
1 | parl>=2.1.1
2 | paddlepaddle>=2.0.0
3 | gym>=0.26.0
4 | mujoco>=2.2.2
5 | 


--------------------------------------------------------------------------------
/examples/DQN/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce DQN with PARL
 2 | Based on PARL, we provide a simple example of the DQN algorithm.
 3 | 
 4 | + Paper: DQN in [Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html)
 5 | 
 6 | ### Result
 7 | 
 8 | Performance of DQN playing CartPole-v0
 9 | 
10 | <p align="left">
11 | <img src="../QuickStart/performance.gif" alt="result" height="175"/>
12 | <img src="cartpole.jpg" alt="result" height="175"/>
13 | </p>
14 | 
15 | ## How to use
16 | ### Dependencies:
17 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle)
18 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
19 | + gym
20 | + pygame
21 | 
22 | 
23 | ### Start Training:
24 | ```
25 | # To train a model for CartPole-v0 game
26 | python train.py
27 | ```
28 | 
29 | ## DQN-Variants
30 | 
31 | For DQN variants such as Double DQN and Dueling DQN, please check [here](https://github.com/PaddlePaddle/PARL/tree/develop/examples/DQN_variant)
32 | 


--------------------------------------------------------------------------------
/examples/DQN/cartpole.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/DQN/cartpole.jpg


--------------------------------------------------------------------------------
/examples/DQN/requirements.txt:
--------------------------------------------------------------------------------
1 | gym
2 | pygame
3 | parl>=2.1.1
4 | paddlepaddle>=2.0.0
5 | 


--------------------------------------------------------------------------------
/examples/DQN_variant/.benchmark/Dueling DQN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/DQN_variant/.benchmark/Dueling DQN.png


--------------------------------------------------------------------------------
/examples/DQN_variant/requirements.txt:
--------------------------------------------------------------------------------
1 | paddlepaddle>=2.0.0
2 | parl>=2.1.1
3 | gym==0.18.0
4 | tqdm
5 | opencv-python
6 | atari-py==0.2.6
7 | 


--------------------------------------------------------------------------------
/examples/ES/es.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import parl
16 | 
17 | __all__ = ['ES']
18 | 
19 | 
20 | class ES(parl.Algorithm):
21 |     def __init__(self, model):
22 |         """ES algorithm.
23 | 
24 |         Since parameters of the model is updated in the numpy level, `learn` function is not needed
25 |         in this algorithm.
26 | 
27 |         Args:
28 |             model(`parl.Model`): policy model of ES algorithm.
29 |         """
30 |         self.model = model
31 | 
32 |     def predict(self, obs):
33 |         """Use the policy model to predict actions of observations.
34 | 
35 |         Args:
36 |             obs(layers.data):  data layer of observations.
37 | 
38 |         Returns:
39 |             tensor of predicted actions.
40 |         """
41 | 
42 |         return self.model(obs)
43 | 


--------------------------------------------------------------------------------
/examples/ES/es_config.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | config = {
16 |     #==========  remote config ==========
17 |     'master_address': 'localhost:8837',
18 |     #==========  env config ==========
19 |     'env_name': 'HalfCheetah-v4',
20 |     #==========  actor config ==========
21 |     'actor_num': 24,
22 |     'action_noise_std': 0.01,
23 |     'min_task_runtime': 0.2,
24 |     'eval_prob': 0.003,
25 |     'filter_update_prob': 0.01,
26 | 
27 |     #==========  learner config ==========
28 |     'stepsize': 0.01,
29 |     'train_steps': 200,
30 |     'min_episodes_per_batch': 1000,
31 |     'min_steps_per_batch': 10000,
32 |     'noise_size': 200000000,
33 |     'noise_stdev': 0.02,
34 |     'l2_coeff': 0.005,
35 |     'report_window_size': 10,
36 | }
37 | 


--------------------------------------------------------------------------------
/examples/ES/noise.py:
--------------------------------------------------------------------------------
 1 | # Third party code
 2 | #
 3 | # The following code are copied or modified from:
 4 | # https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | class SharedNoiseTable(object):
10 |     """Shared noise table used by learner and actor.
11 | 
12 |     Learner and actor will create a same noise table by passing the same seed.
13 |     With the same noise table, learner and actor can communicate the noises by
14 |     index of noise table instead of numpy array of noises.
15 |     """
16 | 
17 |     def __init__(self, noise_size, seed=1024):
18 |         self.noise_size = noise_size
19 |         self.seed = seed
20 |         self.noise = self._create_noise()
21 | 
22 |     def _create_noise(self):
23 |         noise = np.random.RandomState(self.seed).randn(self.noise_size).astype(
24 |             np.float32)
25 |         return noise
26 | 
27 |     def get(self, i, dim):
28 |         return self.noise[i:i + dim]
29 | 
30 |     def sample_index(self, dim):
31 |         return np.random.randint(0, len(self.noise) - dim + 1)
32 | 


--------------------------------------------------------------------------------
/examples/ES/requirements.txt:
--------------------------------------------------------------------------------
1 | paddlepaddle>=2.0.0
2 | parl>=2.1.1
3 | gym>=0.26.0
4 | mujoco>=2.2.2
5 | 


--------------------------------------------------------------------------------
/examples/IMPALA/requirements.txt:
--------------------------------------------------------------------------------
1 | paddlepaddle>=2.0.0
2 | parl>=2.1.1
3 | gym==0.12.1
4 | atari-py==0.1.7
5 | opencv-python
6 | 


--------------------------------------------------------------------------------
/examples/MADDPG/requirements.txt:
--------------------------------------------------------------------------------
1 | PettingZoo==1.17.0
2 | gym==0.23.1
3 | parl>=2.1.1
4 | paddlepaddle
5 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/final_submit/pelvisBasedObs_scaler.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/final_submit/pelvisBasedObs_scaler.npz


--------------------------------------------------------------------------------
/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/competition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/competition.png


--------------------------------------------------------------------------------
/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/curriculum-learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/curriculum-learning.png


--------------------------------------------------------------------------------
/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/demo.gif


--------------------------------------------------------------------------------
/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/fastest.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/fastest.png


--------------------------------------------------------------------------------
/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/last course.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/last course.png


--------------------------------------------------------------------------------
/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/velocity_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/velocity_distribution.png


--------------------------------------------------------------------------------
/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/pelvisBasedObs_scaler.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/pelvisBasedObs_scaler.npz


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/final_submit/official_obs_scaler.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2019-Learn-to-Move-Challenge/final_submit/official_obs_scaler.npz


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/image/performance.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2019-Learn-to-Move-Challenge/image/performance.gif


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/official_obs_scaler.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2019-Learn-to-Move-Challenge/official_obs_scaler.npz


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/replay_memory.py:
--------------------------------------------------------------------------------
1 | ../NeurIPS2018-AI-for-Prosthetics-Challenge/replay_memory.py


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/eval_difficulty1.sh:
--------------------------------------------------------------------------------
1 | # use which GPU
2 | export CUDA_VISIBLE_DEVICES=0
3 | 
4 | python evaluate.py --actor_num 160 \
5 |            --difficulty 1 \
6 |            --penalty_coeff 3.0 \
7 |            --saved_models_dir ./output/difficulty1/model_every_100_episodes \
8 |            --evaluate_times 300
9 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/eval_difficulty2.sh:
--------------------------------------------------------------------------------
 1 | # use which GPU
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | 
 4 | 
 5 | python evaluate.py --actor_num 160 \
 6 |            --difficulty 2 \
 7 |            --penalty_coeff 5.0 \
 8 |            --saved_models_dir ./output/difficulty2/model_every_100_episodes \
 9 |            --evaluate_times 300
10 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/eval_difficulty3.sh:
--------------------------------------------------------------------------------
 1 | # use which GPU
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | 
 4 | python evaluate.py --actor_num 160 \
 5 |            --difficulty 3 \
 6 |            --vel_penalty_coeff 3.0 \
 7 |            --penalty_coeff 2.0 \
 8 |            --saved_models_dir ./output/difficulty3/model_every_100_episodes \
 9 |            --evaluate_times 300
10 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/eval_difficulty3_first_target.sh:
--------------------------------------------------------------------------------
 1 | # use which GPU
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | 
 4 | python evaluate.py --actor_num 160 \
 5 |            --difficulty 3 \
 6 |            --vel_penalty_coeff 3.0 \
 7 |            --penalty_coeff 3.0 \
 8 |            --only_first_target \
 9 |            --saved_models_dir ./output/difficulty3_first_target/model_every_100_episodes \
10 |            --evaluate_times 300
11 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/train_difficulty1.sh:
--------------------------------------------------------------------------------
 1 | echo `which python` 
 2 | if [ $# != 1 ]; then
 3 |     echo "Usage: sh train_difficulty1.sh [RESTORE_MODEL_PATH]" 
 4 |     exit 0
 5 | fi
 6 | 
 7 | # use which GPU
 8 | export CUDA_VISIBLE_DEVICES=0
 9 | 
10 | python train.py --actor_num 300 \
11 |            --difficulty 1 \
12 |            --penalty_coeff 3.0 \
13 |            --logdir ./output/difficulty1 \
14 |            --restore_model_path $1
15 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/train_difficulty2.sh:
--------------------------------------------------------------------------------
 1 | if [ $# != 1 ]; then
 2 |     echo "Usage: sh train_difficulty2.sh [RESTORE_MODEL_PATH]" 
 3 |     exit 0
 4 | fi
 5 | 
 6 | # use which GPU
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | python train.py --actor_num 300 \
10 |            --difficulty 2 \
11 |            --penalty_coeff 5.0 \
12 |            --logdir ./output/difficulty2 \
13 |            --restore_model_path $1
14 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/train_difficulty3.sh:
--------------------------------------------------------------------------------
 1 | if [ $# != 1 ]; then
 2 |     echo "Usage: sh train_difficulty3.sh [RESTORE_MODEL_PATH]" 
 3 |     exit 0
 4 | fi
 5 | 
 6 | # use which GPU
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | python train.py --actor_num 300 \
10 |            --difficulty 3 \
11 |            --vel_penalty_coeff 3.0 \
12 |            --penalty_coeff 2.0 \
13 |            --rpm_size 6e6 \
14 |            --train_times 250 \
15 |            --logdir ./output/difficulty3 \
16 |            --restore_model_path $1
17 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/train_difficulty3_first_target.sh:
--------------------------------------------------------------------------------
 1 | if [ $# != 1 ]; then
 2 |     echo "Usage: sh train_difficulty3_first_target.sh [RESTORE_MODEL_PATH]" 
 3 |     exit 0
 4 | fi
 5 | 
 6 | # use which GPU
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | python train.py --actor_num 300 \
10 |            --difficulty 3 \
11 |            --vel_penalty_coeff 3.0 \
12 |            --penalty_coeff 3.0 \
13 |            --only_first_target \
14 |            --logdir ./output/difficulty3_first_target \
15 |            --restore_model_path $1
16 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/README.md:
--------------------------------------------------------------------------------
 1 | ## NeurIPS2020 L2RPN Challenge
 2 | 
 3 | The **PARL** team gets the first place for all tracks (both Robustness Track and Adaptability Track) in *NeurIPS2020 Learning-to-Run-a-Power-Network* challenge! 
 4 | 
 5 | <p align="center">
 6 | <img src="images/l2rpn.jpeg" alt="PARL" height="300" />
 7 | </p>
 8 | 
 9 | ## Paper Citation
10 | 
11 | If you use our code for your experiments or found it helpful, consider citing the following paper:
12 | 
13 | <pre>
14 | @inproceedings{Zhou2021ActionSB,
15 |   title={Action Set Based Policy Optimization for Safe Power Grid Management},
16 |   author={Bo Zhou and Hongsheng Zeng and Yuecheng Liu and Kejiao Li and Fan Wang and Hao Tian},
17 |   journal={ECML PKDD2021},
18 |   year={2021}
19 | }
20 | </pre>
21 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/images/l2rpn.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/images/l2rpn.jpeg


--------------------------------------------------------------------------------
/examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track1/README.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies
 2 | - python3.6
 3 | - [parl==1.3.2](https://github.com/PaddlePaddle/PARL)
 4 | - [paddlepaddle==1.6.1](https://github.com/PaddlePaddle/Paddle)
 5 | - [grid2op==1.2.2](https://github.com/rte-france/Grid2Op)
 6 | - [lightsim2grid==0.2.4](https://github.com/BDonnot/lightsim2grid)
 7 | 
 8 | ## How to evaluate
 9 |   1. Clone the repository.
10 |   2. Download the saved models from online storage service: [Baidu Pan](https://pan.baidu.com/s/1nqrIDomycy3D4OINSQV-8w) (password: `4801`) or [Google Drive](https://drive.google.com/file/d/1hq4Xf_xywrm3I-1bJNQt_QKrOi8HJrrr/view?usp=sharing)
11 |   3. Unpack the file:  
12 |     ```
13 |     tar -xvzf saved_files.tar.gz
14 |     ```
15 |   4. evaluate the result:  
16 |     ```
17 |     python evaluate.py --num_episodes=10
18 |     ```
19 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track2/README.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies
 2 | - python3.6
 3 | - [parl==1.3.2](https://github.com/PaddlePaddle/PARL)
 4 | - [paddlepaddle==1.6.1](https://github.com/PaddlePaddle/Paddle)
 5 | - [grid2op==1.2.2](https://github.com/rte-france/Grid2Op)
 6 | - [lightsim2grid==0.2.4](https://github.com/BDonnot/lightsim2grid)
 7 | 
 8 | ## How to evaluate
 9 |   1. Clone the repository.
10 |   2. Download the saved models from online storage service: [Baidu Pan](https://pan.baidu.com/s/1sFR17yMUEsXtVyuomUkutw) (password: `uvi6`) or [Google Drive](https://drive.google.com/file/d/1vH52Z2DhSj5Vpk9pBhHKZk0qdqiJFXZU/view?usp=sharing)
11 |   3. Unpack the file:  
12 | 	```
13 | 	tar -zxvf saved_files.tar.gz
14 | 	```
15 |   4. evaluate the result:  
16 | 	```
17 | 	python evaluate.py --num_episodes=10
18 | 	```
19 | 


--------------------------------------------------------------------------------
/examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track2/es.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import parl
16 | 
17 | __all__ = ['ES']
18 | 
19 | 
20 | class ES(parl.Algorithm):
21 |     def __init__(self, model):
22 |         """ES algorithm.
23 |         
24 |         Since parameters of the model is updated in the numpy level, `learn` function is not needed
25 |         in this algorithm.
26 | 
27 |         Args:
28 |             model(`parl.Model`): policy model of ES algorithm.
29 |         """
30 |         self.model = model
31 | 
32 |     def predict(self, obs):
33 |         """Use the policy model to predict actions of observations.
34 | 
35 |         Args:
36 |             obs(layers.data):  data layer of observations.
37 |         Returns:
38 |             tensor of predicted actions.
39 |         """
40 |         return self.model.predict(obs)
41 | 


--------------------------------------------------------------------------------
/examples/OAC/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce OAC with PARL
 2 | Based on PARL, the OAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | > Paper: OAC in [Better Exploration with Optimistic Actor-Critic](https://arxiv.org/abs/1910.12807)
 5 | 
 6 | ### Mujoco games introduction
 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco).
 8 | 
 9 | ### Benchmark result
10 | 
11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/OAC/paddle/result.png" width="600" alt="OAC_results"/>
12 | 
13 | ## How to use
14 | ### Dependencies:
15 | + python3.7+
16 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
17 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle)
18 | + gym>=0.26.0
19 | + mujoco>=2.2.2
20 | 
21 | ### Start Training:
22 | ```
23 | # To train an agent for Humanoid-v4 game
24 | python train.py
25 | 
26 | # To train for other game
27 | python train.py --env [ENV_NAME]
28 | 


--------------------------------------------------------------------------------
/examples/OAC/requirements.txt:
--------------------------------------------------------------------------------
1 | parl>=2.1.1
2 | paddlepaddle>=2.0.0
3 | gym>=0.26.0
4 | mujoco>=2.2.2
5 | 


--------------------------------------------------------------------------------
/examples/PPO/requirements_atari.txt:
--------------------------------------------------------------------------------
1 | gym==0.18.0
2 | paddlepaddle>=2.0.0
3 | parl>=2.1.1
4 | atari-py==0.2.6
5 | opencv-python
6 | 


--------------------------------------------------------------------------------
/examples/PPO/requirements_mujoco.txt:
--------------------------------------------------------------------------------
1 | gym>=0.26.0
2 | mujoco==2.2.2
3 | paddlepaddle>=2.0.0
4 | parl>=2.1.1
5 | 


--------------------------------------------------------------------------------
/examples/QMIX/README.md:
--------------------------------------------------------------------------------
 1 | ## QMIX based on PARL and PaddlePaddle2.0
 2 | We reproduce the QMIX based on **PARL** and **PaddlePaddle>=2.0.0**, reaching the same level of indicators as the paper in StarCraft2 benchmarks.
 3 | ### QMIX
 4 | QMIX is a **value-based** multi-agent reinforcement learning algorithm.  
 5 | Learn more about QMIX from: [QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1803.11485)
 6 | ### StarCraft2 Environment
 7 | Paper: [The StarCraft Multi-Agent Challenge](https://arxiv.org/pdf/1902.04043)  
 8 | Github Repositories: [smac](https://github.com/oxwhirl/smac)  
 9 | ## Benchmark Results
10 | <img src="images/paddle2.0_qmix_result.png" width = "700"  alt="Performance" />  
11 | 
12 | - We trained our model in 5 different scenarios: *"3m", "8m", "2s_3z", "3s_5z"* and *"1c_3s_5z"*.
13 | - The **difficulty** in all scenarios are set to be "7" (very difficult).  
14 | - We trained our model 3 times for each scenario.
15 | 
16 | ## How to Use
17 | ### Dependencies
18 | - python3.6+
19 | + [parl>=2.0.0](https://github.com/PaddlePaddle/PARL)
20 | - [smac](https://github.com/oxwhirl/smac)
21 | - [PaddlePaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle)
22 | 
23 | ### Start Training
24 | 1. Modify the config in `qmix_config.py`.
25 | 2. Start training:
26 |     ```bash
27 |     python train.py
28 |     ```
29 | 3. View the training process with tensorboard:
30 |     ```bash
31 |     tensorboard --logdir ./
32 |     ```
33 | 


--------------------------------------------------------------------------------
/examples/QMIX/images/paddle2.0_qmix_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/QMIX/images/paddle2.0_qmix_result.png


--------------------------------------------------------------------------------
/examples/QMIX/requirements.txt:
--------------------------------------------------------------------------------
1 | PaddlePaddle>=2.0.0
2 | smac
3 | parl>=2.0.0
4 | 


--------------------------------------------------------------------------------
/examples/QuickStart/README.md:
--------------------------------------------------------------------------------
 1 | ## Paddle benchmark Quick Start
 2 | Train an agent with PARL to solve the CartPole problem, a classical benchmark in RL.
 3 | 
 4 | ## How to use
 5 | ### Dependencies:
 6 | 
 7 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
 8 | + paddlepaddle>=2.0.0
 9 | + gym
10 | + pygame
11 | 
12 | ### Start Training:
13 | ```
14 | python train.py  
15 | ```
16 | 
17 | ### Expected Result
18 | <img src="performance.gif" width = "300" height ="200" alt="result"/>
19 | 
20 | The agent can get around 200 points in a few seconds.
21 | 


--------------------------------------------------------------------------------
/examples/QuickStart/cartpole_model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import paddle
16 | import paddle.nn as nn
17 | import paddle.nn.functional as F
18 | import parl
19 | 
20 | 
21 | class CartpoleModel(parl.Model):
22 |     """ Linear network to solve Cartpole problem.
23 |     
24 |     Args:
25 |         obs_dim (int): Dimension of observation space.
26 |         act_dim (int): Dimension of action space.
27 |     """
28 | 
29 |     def __init__(self, obs_dim, act_dim):
30 |         super(CartpoleModel, self).__init__()
31 |         hid1_size = act_dim * 10
32 |         self.fc1 = nn.Linear(obs_dim, hid1_size)
33 |         self.fc2 = nn.Linear(hid1_size, act_dim)
34 | 
35 |     def forward(self, x):
36 |         out = paddle.tanh(self.fc1(x))
37 |         prob = F.softmax(self.fc2(out), axis=-1)
38 |         return prob
39 | 


--------------------------------------------------------------------------------
/examples/QuickStart/performance.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/QuickStart/performance.gif


--------------------------------------------------------------------------------
/examples/QuickStart/requirements.txt:
--------------------------------------------------------------------------------
1 | gym
2 | pygame
3 | parl>=2.1.1
4 | paddlepaddle>=2.0.0
5 | 


--------------------------------------------------------------------------------
/examples/SAC/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce SAC with PARL
 2 | Based on PARL, the SAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | > Paper: SAC in [Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor](https://arxiv.org/abs/1801.01290)
 5 | 
 6 | ### Mujoco games introduction
 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco)
 8 | 
 9 | ### Benchmark result
10 | 
11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/SAC/paddle/result.png" alt="SAC_results"/>
12 | 
13 | + Each experiment was run three times with different seeds
14 | 
15 | ## How to use
16 | ### Dependencies:
17 | + python3.7+
18 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
19 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle)
20 | + gym>=0.26.0
21 | + mujoco-py>=2.2.2
22 | 
23 | ### Start Training:
24 | #### Train
25 | ```
26 | # To train for HalfCheetah-v4(default),Hopper-v4,Walker2d-v4,Ant-v4
27 | # --alpha 0.2(default)
28 | python train.py --env [ENV_NAME]
29 | 
30 | # To reproduce the performance of Humanoid-v4
31 | python train.py --env Humanoid-v4 --alpha 0.05
32 | 


--------------------------------------------------------------------------------
/examples/SAC/requirements.txt:
--------------------------------------------------------------------------------
1 | parl>=2.1.1
2 | paddlepaddle>=2.0.0
3 | gym>=0.26.0
4 | mujoco>=2.2.2
5 | 


--------------------------------------------------------------------------------
/examples/TD3/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce TD3 with PARL
 2 | Based on PARL, the TD3 algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks.
 3 | 
 4 | Include following improvements:
 5 | + Clipped Double Q-learning
 6 | + Target Networks and Delayed Policy Update
 7 | + Target Policy Smoothing Regularization
 8 | 
 9 | > TD3 in
10 | [Addressing Function Approximation Error in Actor-Critic Methods](https://arxiv.org/abs/1802.09477)
11 | 
12 | ### Mujoco games introduction
13 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco)
14 | 
15 | ### Benchmark result
16 | 
17 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/TD3/paddle/result.png" alt="TD3_results"/>
18 | + Each experiment was run three times with different seeds
19 | 
20 | ## How to use
21 | ### Dependencies:
22 | + python3.7+
23 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL)
24 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle)
25 | + gym>=0.26.0
26 | + mujoco>=2.2.2
27 | 
28 | ### Start Training:
29 | ```
30 | # To train an agent for HalfCheetah-v4 game
31 | python train.py
32 | 
33 | # To train for different game
34 | python train.py --env [ENV_NAME]
35 | 


--------------------------------------------------------------------------------
/examples/TD3/requirements.txt:
--------------------------------------------------------------------------------
1 | parl>=2.1.1
2 | paddlepaddle>=2.0.0
3 | gym>=0.26.0
4 | mujoco>=2.2.2
5 | 


--------------------------------------------------------------------------------
/examples/tutorials/README.md:
--------------------------------------------------------------------------------
 1 | ## 《PARL强化学习入门实践》课程示例
 2 | 
 3 | 针对强化学习初学者,PARL提供了[入门课程](https://aistudio.baidu.com/aistudio/course/introduce/1335),展示最基础的5个强化学习算法代码示例(注意:本课程示例均基于**静态图框架**编写)。
 4 | 
 5 | ## News: 
 6 | + [2021.12.06] 应广大学员的要求,我们提供了课程配套代码的(lesson3-lesson5)的**动态图框架**版本,见 [链接](parl2_dygraph/)。lesson1-lesson2不涉及神经网络,可沿用本目录下的代码。
 7 | 
 8 | ## 课程大纲
 9 | + 一、强化学习(RL)初印象
10 |     + RL概述、入门路线
11 |     + 实践:环境搭建([lesson1](lesson1/gridworld.py) 的代码提供了格子环境世界的渲染封装)
12 | + 二、基于表格型方法求解RL
13 |     + MDP、状态价值、Q表格
14 |     + 实践: [Sarsa](lesson2/sarsa)、[Q-learning](lesson2/q_learning)
15 | + 三、基于神经网络方法求解RL
16 |     + 函数逼近方法
17 |     + 实践:[DQN](lesson3/dqn)
18 | + 四、基于策略梯度求解RL
19 |     + 策略近似、策略梯度
20 |     + 实践:[Policy Gradient](lesson4/policy_gradient)
21 | + 五、连续动作空间上求解RL
22 |     + 实战:[DDPG](lesson5/ddpg)
23 | 
24 | 
25 | 
26 | ## 使用说明
27 | 
28 | ### 安装依赖(注意:请务必安装对应的版本)
29 | 
30 | + Python 3.6/3.7 
31 | + [paddlepaddle](https://github.com/PaddlePaddle/Paddle)==1.8.5
32 | + [parl](https://github.com/PaddlePaddle/PARL)==1.3.1 或者 parl==1.4
33 | + gym==0.18.0
34 | + atari-py==0.2.6 (仅 lesson4 的 homework 需要安装)
35 | + rlschool==0.3.1 (仅 lesson5 的 homework 需要安装)
36 | 
37 | 可以直接安装本目录下的 `requirements.txt` 来完成以上依赖版本的适配。
38 | ```
39 | pip install -r requirements.txt
40 | ```
41 | 
42 | ### 运行示例
43 | 
44 | 进入每个示例对应的代码文件夹中,运行
45 | ```
46 | python train.py
47 | ```
48 | 


--------------------------------------------------------------------------------
/examples/tutorials/homework/lesson3/dqn_mountaincar/model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #-*- coding: utf-8 -*-
16 | 
17 | import parl
18 | from parl import layers  # 封装了 paddle.fluid.layers 的API
19 | 
20 | 
21 | class Model(parl.Model):
22 |     def __init__(self, act_dim):
23 |         hid1_size = 128
24 |         hid2_size = 128
25 |         # 3层全连接网络
26 |         self.fc1 = layers.fc(size=hid1_size, act='relu')
27 |         self.fc2 = layers.fc(size=hid2_size, act='relu')
28 |         self.fc3 = layers.fc(size=act_dim, act=None)
29 | 
30 |     def value(self, obs):
31 |         h1 = self.fc1(obs)
32 |         h2 = self.fc2(h1)
33 |         Q = self.fc3(h2)
34 |         return Q
35 | 


--------------------------------------------------------------------------------
/examples/tutorials/homework/lesson4/policy_gradient_pong/model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #-*- coding: utf-8 -*-
16 | 
17 | import parl
18 | from parl import layers
19 | 
20 | 
21 | class Model(parl.Model):
22 |     def __init__(self, act_dim):
23 |         act_dim = act_dim
24 |         hid1_size = 256
25 |         hid2_size = 64
26 | 
27 |         self.fc1 = layers.fc(size=hid1_size, act='relu')
28 |         self.fc2 = layers.fc(size=hid2_size, act='relu')
29 |         self.fc3 = layers.fc(size=act_dim, act='softmax')
30 | 
31 |     def forward(self, obs):
32 |         h1 = self.fc1(obs)
33 |         h2 = self.fc2(h1)
34 |         out = self.fc3(h2)
35 |         return out
36 | 


--------------------------------------------------------------------------------
/examples/tutorials/lesson3/dqn/model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #-*- coding: utf-8 -*-
16 | 
17 | import parl
18 | from parl import layers  # 封装了 paddle.fluid.layers 的API
19 | 
20 | 
21 | class Model(parl.Model):
22 |     def __init__(self, act_dim):
23 |         hid1_size = 128
24 |         hid2_size = 128
25 |         # 3层全连接网络
26 |         self.fc1 = layers.fc(size=hid1_size, act='relu')
27 |         self.fc2 = layers.fc(size=hid2_size, act='relu')
28 |         self.fc3 = layers.fc(size=act_dim, act=None)
29 | 
30 |     def value(self, obs):
31 |         h1 = self.fc1(obs)
32 |         h2 = self.fc2(h1)
33 |         Q = self.fc3(h2)
34 |         return Q
35 | 


--------------------------------------------------------------------------------
/examples/tutorials/lesson4/policy_gradient/model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #-*- coding: utf-8 -*-
16 | 
17 | import parl
18 | from parl import layers
19 | 
20 | 
21 | class Model(parl.Model):
22 |     def __init__(self, act_dim):
23 |         act_dim = act_dim
24 |         hid1_size = act_dim * 10
25 | 
26 |         self.fc1 = layers.fc(size=hid1_size, act='tanh')
27 |         self.fc2 = layers.fc(size=act_dim, act='softmax')
28 | 
29 |     def forward(self, obs):  # 可直接用 model = Model(5); model(obs)调用
30 |         out = self.fc1(obs)
31 |         out = self.fc2(out)
32 |         return out
33 | 


--------------------------------------------------------------------------------
/examples/tutorials/parl2_dygraph/README.md:
--------------------------------------------------------------------------------
 1 | ## 《PARL强化学习入门实践》课程示例(动态图版本)
 2 | + 应广大学员的要求,我们提供了课程配套代码的(lesson3-lesson5)的**动态图框架**版本, lesson1-lesson2不涉及神经网络,可沿用上级目录中的代码。
 3 | 
 4 | ## 代码大纲
 5 | + `lesson3`:基于神经网络方法求解RL
 6 |     + dqn:使用 DQN 算法解决 CartPole 问题。
 7 |     + homework:使用 DQN 算法解决 MountainCar 问题。
 8 | + `lesson4`:基于策略梯度求解RL
 9 |     + policy_gradient:使用 PG 算法解决 CartPole 问题。
10 |     + homework: 使用 PG 算法解决 Atari 游戏里的 Pong 环境。
11 | + `lesson5`:连续动作空间上求解RL
12 |     + ddpg:使用 DDPG 算法解决连续动作版本的 CartPole 问题。
13 |     + homework:使用 DDPG 算法解决四轴飞行器的悬停问题。
14 | 
15 | 
16 | ## 使用说明
17 | 
18 | ### 安装依赖(注意:请务必安装对应的版本)
19 | 
20 | + Python 3.6/3.7/3.8
21 | + [paddlepaddle](https://github.com/PaddlePaddle/Paddle)==2.2.0
22 | + [parl](https://github.com/PaddlePaddle/PARL)==2.0.3
23 | + gym==0.18.0
24 | + atari-py==0.2.6 (仅 lesson4 的 homework 需要安装)
25 | + rlschool==0.3.1 (仅 lesson5 的 homework 需要安装)
26 | 
27 | 可以直接安装本目录下的 `requirements.txt` 来完成以上依赖版本的适配。
28 | ```
29 | pip install -r requirements.txt
30 | ```
31 | 
32 | ### 运行示例
33 | 
34 | 进入每个示例对应的代码文件夹中,运行
35 | ```
36 | python train.py
37 | ```
38 | 


--------------------------------------------------------------------------------
/examples/tutorials/parl2_dygraph/lesson4/policy_gradient/model.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #-*- coding: utf-8 -*-
16 | 
17 | import paddle
18 | import paddle.nn as nn
19 | import paddle.nn.functional as F
20 | import parl
21 | 
22 | 
23 | class Model(parl.Model):
24 |     """ 使用全连接网络.
25 | 
26 |     参数:
27 |         obs_dim (int): 观测空间的维度.
28 |         act_dim (int): 动作空间的维度.
29 |     """
30 | 
31 |     def __init__(self, obs_dim, act_dim):
32 |         super(Model, self).__init__()
33 |         hid1_size = act_dim * 10
34 |         self.fc1 = nn.Linear(obs_dim, hid1_size)
35 |         self.fc2 = nn.Linear(hid1_size, act_dim)
36 | 
37 |     def forward(self, x):  # 可直接用 model = Model(5); model(obs)调用
38 |         out = paddle.tanh(self.fc1(x))
39 |         prob = F.softmax(self.fc2(out), axis=-1)
40 |         return prob
41 | 


--------------------------------------------------------------------------------
/examples/tutorials/parl2_dygraph/requirements.txt:
--------------------------------------------------------------------------------
1 | # requirements for tutorials (paddle dygraph version)
2 | paddlepaddle==2.2.0
3 | parl==2.0.3
4 | gym==0.18.0
5 | atari-py==0.2.6
6 | rlschool==0.3.1
7 | 


--------------------------------------------------------------------------------
/examples/tutorials/requirements.txt:
--------------------------------------------------------------------------------
1 | # requirements for tutorials (paddle fluid version)
2 | paddlepaddle==1.8.5
3 | parl==1.4
4 | gym==0.18.0
5 | atari-py==0.2.6
6 | rlschool==0.3.1
7 | 


--------------------------------------------------------------------------------
/papers/NeurIPS 2019 RL papers.numbers:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/papers/NeurIPS 2019 RL papers.numbers


--------------------------------------------------------------------------------
/parl/algorithms/fluid/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.algorithms.fluid.a3c import *
16 | from parl.algorithms.fluid.ddpg import *
17 | from parl.algorithms.fluid.maddpg import *
18 | from parl.algorithms.fluid.dqn import *
19 | from parl.algorithms.fluid.ddqn import *
20 | from parl.algorithms.fluid.policy_gradient import *
21 | from parl.algorithms.fluid.ppo import *
22 | from parl.algorithms.fluid.td3 import *
23 | from parl.algorithms.fluid.sac import *
24 | from parl.algorithms.fluid.impala.impala import *
25 | from parl.algorithms.fluid.qmix import *
26 | 


--------------------------------------------------------------------------------
/parl/algorithms/fluid/impala/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.algorithms.fluid.impala.impala import *
16 | 


--------------------------------------------------------------------------------
/parl/algorithms/paddle/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.algorithms.paddle.policy_gradient import *
16 | from parl.algorithms.paddle.td3 import *
17 | from parl.algorithms.paddle.sac import *
18 | from parl.algorithms.paddle.dqn import *
19 | from parl.algorithms.paddle.ddpg import *
20 | from parl.algorithms.paddle.oac import *
21 | from parl.algorithms.paddle.qmix import *
22 | from parl.algorithms.paddle.a2c import *
23 | from parl.algorithms.paddle.ddqn import *
24 | from parl.algorithms.paddle.maddpg import *
25 | from parl.algorithms.paddle.ppo import *
26 | from parl.algorithms.paddle.cql import *
27 | from parl.algorithms.paddle.impala.impala import *
28 | 


--------------------------------------------------------------------------------
/parl/algorithms/paddle/impala/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.algorithms.paddle.impala.impala import *
16 | 


--------------------------------------------------------------------------------
/parl/algorithms/torch/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.algorithms.torch.ddqn import *
16 | from parl.algorithms.torch.dqn import *
17 | from parl.algorithms.torch.a2c import *
18 | from parl.algorithms.torch.td3 import *
19 | from parl.algorithms.torch.coma import *
20 | from parl.algorithms.torch.ppo import *
21 | from parl.algorithms.torch.policy_gradient import *
22 | from parl.algorithms.torch.qmix import *
23 | from parl.algorithms.torch.ddpg import *
24 | from parl.algorithms.torch.sac import *
25 | from parl.algorithms.torch.oac import *
26 | from parl.algorithms.torch.maddpg import *
27 | from parl.algorithms.torch.cql import *
28 | from parl.algorithms.torch.mappo import *
29 | from parl.algorithms.torch.dt import *
30 | 


--------------------------------------------------------------------------------
/parl/core/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.core.agent_base import *
16 | from parl.core.model_base import *
17 | from parl.core.algorithm_base import *
18 | 


--------------------------------------------------------------------------------
/parl/core/fluid/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.core.fluid.model import *
16 | from parl.core.fluid.algorithm import *
17 | from parl.core.fluid.agent import *
18 | 
19 | from . import layers
20 | from . import plutils
21 | from . import policy_distribution
22 | 


--------------------------------------------------------------------------------
/parl/core/fluid/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | This file wraps Fluid layers that have parameters to support parameter sharing.
16 | For other layers that don't have parameters, we simply copy them to this namespace.
17 | """
18 | from paddle.fluid.layers import *
19 | from parl.core.fluid.layers.layer_wrappers import *
20 | 


--------------------------------------------------------------------------------
/parl/core/fluid/plutils/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.core.fluid.plutils.common import *
16 | 


--------------------------------------------------------------------------------
/parl/core/paddle/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.core.paddle.model import *
16 | from parl.core.paddle.algorithm import *
17 | from parl.core.paddle.agent import *
18 | 


--------------------------------------------------------------------------------
/parl/core/tests/model_base_test.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | from parl.core.model_base import ModelBase
17 | 
18 | 
19 | class TestBaseModel(ModelBase):
20 |     def forward(self, x, y):
21 |         return x + y
22 | 
23 | 
24 | class ModelBaseTest(unittest.TestCase):
25 |     def setUp(self):
26 |         self.model = TestBaseModel()
27 | 
28 |     def test_forward(self):
29 |         x, y = 10, 20
30 |         expected_out = x + y
31 |         forward_out = self.model(x, y)
32 |         self.assertEqual(forward_out, expected_out)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     unittest.main()
37 | 


--------------------------------------------------------------------------------
/parl/core/torch/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.core.torch.model import *
16 | from parl.core.torch.algorithm import *
17 | from parl.core.torch.agent import *
18 | 


--------------------------------------------------------------------------------
/parl/env/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.env.vector_env import *
16 | from parl.env.continuous_wrappers import *
17 | from parl.env.compat_wrappers import *
18 | 


--------------------------------------------------------------------------------
/parl/remote/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.remote.master import *
16 | from parl.remote.worker import *
17 | from parl.remote.client import *
18 | from parl.remote.exceptions import *
19 | from parl.remote.remote_decorator import *
20 | 


--------------------------------------------------------------------------------
/parl/remote/compatible_trick.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | This file is used to fix the problem that cloudpickle cannot load some packages normally in Mac OS.
16 | We hack the problem by trying load these packages in the main module in advance.
17 | 
18 | Template:
19 | 
20 | try:
21 |     import [PACKAGE1]
22 | except ImportError:
23 |     pass
24 | 
25 | try:
26 |     import [PACKAGE2]
27 | except ImportError:
28 |     pass
29 | 
30 | """
31 | from parl.utils import _IS_MAC
32 | 
33 | if _IS_MAC:
34 |     try:
35 |         import rlschool
36 |     except ImportError:
37 |         pass
38 | 


--------------------------------------------------------------------------------
/parl/remote/future_mode/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.remote.future_mode.future_object import *
16 | from parl.remote.future_mode.proxy_wrapper_nowait import *
17 | 


--------------------------------------------------------------------------------
/parl/remote/grpc_heartbeat/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.remote.grpc_heartbeat.heartbeat_server import *
16 | from parl.remote.grpc_heartbeat.heartbeat_client import *
17 | 


--------------------------------------------------------------------------------
/parl/remote/grpc_heartbeat/heartbeat.proto:
--------------------------------------------------------------------------------
 1 | //   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | syntax = "proto3";
16 | 
17 | // The hearbeat service definition.
18 | service GrpcHeartbeat {
19 |   rpc Send(Request) returns (Reply) {}
20 | }
21 | 
22 | // The request message.
23 | message Request {
24 |   string client_id = 1;
25 |   bytes tag = 2;
26 |   string instance_id = 4; // used in heartbeat detection between the job and client.
27 |   string extra_msg = 8;
28 | }
29 | 
30 | // The response message
31 | message Reply {
32 |   bytes tag = 1;
33 | }
34 | 


--------------------------------------------------------------------------------
/parl/remote/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/parl/remote/static/favicon.ico


--------------------------------------------------------------------------------
/parl/remote/static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/parl/remote/static/logo.png


--------------------------------------------------------------------------------
/parl/remote/tests/client_not_init_test.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import unittest
15 | import parl
16 | 
17 | 
18 | @parl.remote_class
19 | class Actor(object):
20 |     pass
21 | 
22 | 
23 | class TestClient(unittest.TestCase):
24 |     def test_not_init(self):
25 |         """client is expected to raise an error and say that the master has not been started"""
26 | 
27 |         def create_actor():
28 |             actor = Actor()
29 | 
30 |         self.assertRaises(AssertionError, create_actor)
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     unittest.main()
35 | 


--------------------------------------------------------------------------------
/parl/remote/tests/mocking_env_test.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from parl.remote.test_utils import MockingEnv
15 | from parl.remote.test_utils import env_config_for_test
16 | import unittest
17 | import parl
18 | 
19 | 
20 | @env_config_for_test
21 | class TestMockingEnv(MockingEnv):
22 |     def test_gpu_env(self):
23 |         ret = parl.utils.machine_info.is_gpu_available()
24 |         assert ret == self.return_value
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     unittest.main()
29 | 


--------------------------------------------------------------------------------
/parl/remote/tests/rom/pong.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/parl/remote/tests/rom/pong.bin


--------------------------------------------------------------------------------
/parl/remote/tests/simulate_client.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import time
15 | import parl
16 | 
17 | 
18 | @parl.remote_class
19 | class Actor(object):
20 |     def add_one(self, value):
21 |         value += 1
22 |         return value
23 | 
24 | 
25 | def train():
26 |     # reset_job_test.py will execute simulate_client.py, these two files must use the same port
27 |     parl.connect('localhost:1337')  # can not use get_free_tcp_port()
28 |     actor = Actor()
29 |     actor.add_one(1)
30 |     time.sleep(100000)
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     train()
35 | 


--------------------------------------------------------------------------------
/parl/remote/tests/test_import_module/Module2.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import parl
15 | 
16 | 
17 | @parl.remote_class
18 | class B(object):
19 |     def add_sum(self, a, b):
20 |         return a + b
21 | 


--------------------------------------------------------------------------------
/parl/remote/tests/test_import_module/main_abs_test.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import unittest
15 | import parl
16 | from parl.utils.test_utils import XparlTestCase
17 | 
18 | class TestImport(XparlTestCase):
19 |     def test_import_local_module(self):
20 |         from Module2 import B
21 |         self.add_master()
22 |         self.add_worker(n_cpu=1)
23 |         parl.connect("localhost:{}".format(self.port))
24 |         obj = B()
25 |         res = obj.add_sum(10, 5)
26 |         self.assertEqual(res, 15)
27 | 
28 | if __name__ == '__main__':
29 |     unittest.main(failfast=True)
30 | 


--------------------------------------------------------------------------------
/parl/remote/tests/test_import_module/subdir/Module.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import parl
15 | 
16 | 
17 | @parl.remote_class
18 | class A(object):
19 |     def add_sum(self, a, b):
20 |         return a + b
21 | 


--------------------------------------------------------------------------------
/parl/remote/tests/test_import_module/subdir/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/parl/tests/import_test.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | 
18 | class ImportTest(unittest.TestCase):
19 |     def test_import_parl_alone(self):
20 |         import parl
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     unittest.main()
25 | 


--------------------------------------------------------------------------------
/parl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from parl.utils.exceptions import *
16 | from parl.utils.utils import *
17 | from parl.utils.csv_logger import *
18 | from parl.utils.machine_info import *
19 | from parl.utils.np_utils import *
20 | from parl.utils.replay_memory import *
21 | from parl.utils.rl_utils import *
22 | from parl.utils.scheduler import *
23 | from parl.utils.path_utils import *
24 | from parl.utils.globvars import *
25 | 


--------------------------------------------------------------------------------
/parl/utils/np_utils.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | 
17 | __all__ = ['np_softmax', 'np_cross_entropy']
18 | 
19 | 
20 | def np_softmax(logits):
21 |     return np.exp(logits) / np.sum(np.exp(logits), axis=-1, keepdims=True)
22 | 
23 | 
24 | def np_cross_entropy(probs, labels):
25 |     if labels.shape[-1] == 1:
26 |         # sparse label
27 |         n_classes = probs.shape[-1]
28 |         result_shape = list(labels.shape[:-1]) + [n_classes]
29 |         labels = np.eye(n_classes)[labels.reshape(-1)]
30 |         labels = labels.reshape(result_shape)
31 | 
32 |     return -np.sum(labels * np.log(probs), axis=-1, keepdims=True)
33 | 


--------------------------------------------------------------------------------
/parl/utils/path_utils.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = ['format_uniform_path']
16 | 
17 | import os
18 | 
19 | 
20 | def format_uniform_path(path):
21 |     """format the path to a new path which separated by os.sep.
22 |     """
23 |     path = path.replace("//", os.sep)
24 |     path = path.replace("/", os.sep)
25 |     path = path.replace("\\", os.sep)
26 |     return path
27 | 


--------------------------------------------------------------------------------
/parl/utils/summary.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | try:
16 |     from parl.utils.visualdl import *
17 | except ImportError:
18 |     from parl.utils.tensorboard import *
19 | 


--------------------------------------------------------------------------------
/parl/utils/tests/globvar_test.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | import argparse
17 | from parl.utils import logger
18 | from parl.utils import global_config as config
19 | 
20 | 
21 | class TestGlobalConfig(unittest.TestCase):
22 |     def test_load_argument(self):
23 |         config.load_argument(parser.parse_args())
24 |         self.assertEqual(config.embed_dim, 128)
25 |         self.assertEqual(config.n_head, 1)
26 | 
27 | if __name__ == '__main__':
28 |     parser = argparse.ArgumentParser()
29 |     parser.add_argument('--embed_dim', type=int, default=128)
30 |     parser.add_argument('--n_head', type=int, default=1)
31 |     unittest.main()
32 | 


--------------------------------------------------------------------------------
/parl/utils/tests/not_import_dl_framework_test.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | import sys
17 | import os
18 | import importlib
19 | 
20 | class TestNotImportPaddle(unittest.TestCase):
21 |     def test_import(self):
22 |         # setting this environment variable will not import deep learning framework
23 |         os.environ['XPARL_igonre_core'] = 'true'
24 |         import parl
25 |         self.assertFalse('paddle' in sys.modules)
26 |         # remove the environment vaiable and reimport the lib
27 |         del os.environ['XPARL_igonre_core']
28 |         importlib.reload(parl)
29 |         self.assertTrue('paddle' in sys.modules)
30 | 
31 | if __name__ == '__main__':
32 |     unittest.main()
33 | 


--------------------------------------------------------------------------------
/parl/utils/tests/not_import_dl_framework_test_torch.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | import sys
17 | import os
18 | import importlib
19 | 
20 | class TestNotImportPaddle(unittest.TestCase):
21 |     def test_import(self):
22 |         # setting this environment variable will not import deep learning framework
23 |         os.environ['XPARL_igonre_core'] = 'true'
24 |         import parl
25 |         self.assertFalse('torch' in sys.modules)
26 |         # remove the environment vaiable and reimport the lib
27 |         del os.environ['XPARL_igonre_core']
28 |         importlib.reload(parl)
29 |         self.assertTrue('torch' in sys.modules)
30 | 
31 | if __name__ == '__main__':
32 |     unittest.main()
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/A2C/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:A2C
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --max_sample_steps:lite_train_lite_infer=10
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/A2C/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/CQL/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:CQL
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --train_total_steps:lite_train_lite_infer=10
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/CQL/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/DDPG/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:DDPG
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --train_total_steps:lite_train_lite_infer=10001
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/DDPG/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/DQN/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:DQN
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --max_episode:lite_train_lite_infer=3
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/DQN/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/DQN_variant/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:DQN_variant
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --train_total_steps:lite_train_lite_infer=1
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/DQN_variant/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/ES/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:ES
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --train_steps:lite_train_lite_infer=1
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/ES/train.py --actor_num 2
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/MADDPG/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:MADDPG
 3 | python:python3.7
 4 | gpu_list:0
 5 | null:null
 6 | --max_episode:lite_train_lite_infer=3
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/MADDPG/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/OAC/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:OAC
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --train_total_steps:lite_train_lite_infer=10001
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/OAC/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/PPO/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:PPO
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --train_total_steps:lite_train_lite_infer=5000
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/PPO/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/QuickStart/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:QuickStart
 3 | python:python3.7
 4 | gpu_list:0
 5 | null:null
 6 | --max_episode:lite_train_lite_infer=3
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/QuickStart/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/SAC/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:SAC
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --train_total_steps:lite_train_lite_infer=10001
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/SAC/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/configs/TD3/train_infer_python.txt:
--------------------------------------------------------------------------------
 1 | ===========================train_params===========================
 2 | model_name:TD3
 3 | python:python3.7
 4 | gpu_list:-1
 5 | null:null
 6 | --train_total_steps:lite_train_lite_infer=10001
 7 | null:null
 8 | null:null
 9 | null:null
10 | train_model_name:null
11 | null:null
12 | ##
13 | trainer:norm_train
14 | norm_train:examples/TD3/train.py
15 | ##
16 | ===========================eval_params===========================
17 | eval:null
18 | ##
19 | ===========================infer_params===========================
20 | null:null
21 | null:null
22 | null:null
23 | ##
24 | null:null
25 | null:null
26 | ##
27 | null:null
28 | null:null
29 | null:null
30 | null:null
31 | null:null
32 | null:null
33 | 


--------------------------------------------------------------------------------
/test_tipc/docs/test_tipc_log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/test_tipc/docs/test_tipc_log.png


--------------------------------------------------------------------------------