├── .copyright.hook ├── .github ├── Aircraft.gif ├── Breakout.gif ├── Half-Cheetah.gif ├── PARL-logo.png ├── abstractions.png ├── decorator.png └── workflows │ └── unittest.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── .teamcity ├── CHANGELOG.md ├── Dockerfile ├── action_build.sh ├── build.sh ├── requirements.txt ├── requirements_fluid.txt ├── requirements_torch.txt ├── update_readme_paddle_version.py ├── windows_build.bat ├── windows_requirements_fluid.txt ├── windows_requirements_paddle.txt └── windows_test.sh ├── CMakeLists.txt ├── LICENSE ├── MANIFEST.in ├── README.cn.md ├── README.md ├── benchmark ├── fluid │ ├── A2C │ │ ├── README.md │ │ ├── a2c_config.py │ │ ├── actor.py │ │ ├── atari_agent.py │ │ ├── atari_model.py │ │ ├── learning_curve.png │ │ ├── result.png │ │ └── train.py │ ├── DDPG │ │ ├── .benchmark │ │ │ ├── DDPG_HalfCheetah-v2.png │ │ │ ├── DDPG_Hopper-v2.png │ │ │ └── DDPG_Humanoid-v2.png │ │ ├── README.md │ │ ├── mujoco_agent.py │ │ ├── mujoco_model.py │ │ └── train.py │ ├── DQN │ │ ├── README.md │ │ ├── cartpole.jpg │ │ ├── cartpole_agent.py │ │ ├── cartpole_model.py │ │ ├── replay_memory.py │ │ ├── train.py │ │ ├── train_on_xpu.md │ │ └── train_with_xpu.py │ ├── DQN_variant │ │ ├── .benchmark │ │ │ ├── merge.png │ │ │ └── table.png │ │ ├── README.md │ │ ├── atari.py │ │ ├── atari_agent.py │ │ ├── atari_model.py │ │ ├── atari_wrapper.py │ │ ├── replay_memory.py │ │ ├── rom_files │ │ │ ├── battle_zone.bin │ │ │ ├── breakout.bin │ │ │ └── pong.bin │ │ ├── train.py │ │ └── utils.py │ ├── ES │ │ ├── README.md │ │ ├── actor.py │ │ ├── es.py │ │ ├── es_config.py │ │ ├── learning_curve.png │ │ ├── mujoco_agent.py │ │ ├── mujoco_model.py │ │ ├── noise.py │ │ ├── obs_filter.py │ │ ├── optimizers.py │ │ ├── train.py │ │ └── utils.py │ ├── GA3C │ │ ├── .benchmark │ │ │ ├── GA3C_BeamRider.jpg │ │ │ ├── GA3C_Breakout.jpg │ │ │ ├── GA3C_Pong.jpg │ │ │ ├── GA3C_Qbert.jpg │ │ │ └── GA3C_SpaceInvaders.jpg │ │ ├── README.md │ │ ├── actor.py │ │ ├── atari_agent.py │ │ ├── atari_model.py │ │ ├── ga3c_config.py │ │ └── train.py │ ├── IMPALA │ │ ├── .benchmark │ │ │ ├── IMPALA_BeamRider.jpg │ │ │ ├── IMPALA_Breakout.jpg │ │ │ ├── IMPALA_Pong.jpg │ │ │ ├── IMPALA_Qbert.jpg │ │ │ └── IMPALA_SpaceInvaders.jpg │ │ ├── README.md │ │ ├── actor.py │ │ ├── atari_agent.py │ │ ├── atari_model.py │ │ ├── impala_config.py │ │ └── train.py │ ├── MADDPG │ │ ├── .benchmark │ │ │ ├── MADDPG_simple.gif │ │ │ ├── MADDPG_simple.png │ │ │ ├── MADDPG_simple_adversary.gif │ │ │ ├── MADDPG_simple_adversary.png │ │ │ ├── MADDPG_simple_crypto.png │ │ │ ├── MADDPG_simple_push.gif │ │ │ ├── MADDPG_simple_push.png │ │ │ ├── MADDPG_simple_reference.gif │ │ │ ├── MADDPG_simple_reference.png │ │ │ ├── MADDPG_simple_speaker_listener.gif │ │ │ ├── MADDPG_simple_speaker_listener.png │ │ │ ├── MADDPG_simple_spread.gif │ │ │ ├── MADDPG_simple_spread.png │ │ │ ├── MADDPG_simple_tag.gif │ │ │ ├── MADDPG_simple_tag.png │ │ │ ├── MADDPG_simple_world_comm.gif │ │ │ └── MADDPG_simple_world_comm.png │ │ ├── README.md │ │ ├── simple_agent.py │ │ ├── simple_model.py │ │ └── train.py │ ├── PPO │ │ ├── .benchmark │ │ │ ├── PPO_HalfCheetah-v2.png │ │ │ └── PPO_Hopper-v2.png │ │ ├── README.md │ │ ├── mujoco_agent.py │ │ ├── mujoco_model.py │ │ ├── scaler.py │ │ └── train.py │ ├── Prioritized_DQN │ │ ├── README.md │ │ ├── atari.py │ │ ├── atari_agent.py │ │ ├── atari_model.py │ │ ├── atari_wrapper.py │ │ ├── per_alg.py │ │ ├── proportional_per.py │ │ ├── result.png │ │ ├── rom_files │ │ ├── train.py │ │ └── utils.py │ ├── QMIX │ │ ├── README.md │ │ ├── env_wrapper.py │ │ ├── images │ │ │ └── paddle-qmix-result.png │ │ ├── qmix_agent.py │ │ ├── qmix_config.py │ │ ├── qmixer_model.py │ │ ├── replay_buffer.py │ │ ├── rnn_model.py │ │ ├── train.py │ │ └── utils.py │ ├── QuickStart │ │ ├── README.md │ │ ├── cartpole_agent.py │ │ ├── cartpole_model.py │ │ ├── performance.gif │ │ └── train.py │ ├── SAC │ │ ├── .benchmark │ │ │ └── merge.png │ │ ├── README.md │ │ ├── mujoco_agent.py │ │ ├── mujoco_model.py │ │ └── train.py │ ├── TD3 │ │ ├── .benchmark │ │ │ └── merge.png │ │ ├── README.md │ │ ├── mujoco_agent.py │ │ ├── mujoco_model.py │ │ └── train.py │ └── offline-Q-learning │ │ ├── README.md │ │ ├── atari.py │ │ ├── atari_agent.py │ │ ├── atari_model.py │ │ ├── atari_wrapper.py │ │ ├── dqn.py │ │ ├── parallel_run.py │ │ ├── replay_memory.py │ │ ├── rom_files │ │ └── utils.py └── torch │ ├── AlphaZero │ ├── .pic │ │ ├── good_moves.png │ │ └── perfect_moves.png │ ├── Arena.py │ ├── Coach.py │ ├── MCTS.py │ ├── README.md │ ├── actor.py │ ├── alphazero_agent.py │ ├── connect4_game.py │ ├── connect4_model.py │ ├── gen_submission.py │ ├── main.py │ ├── submission_template.py │ └── utils.py │ ├── DT │ ├── README.md │ ├── agent.py │ ├── data │ │ └── download_d4rl_datasets.py │ ├── data_loader.py │ ├── evaluate_episodes.py │ ├── model.py │ ├── train.py │ └── trajectory_gpt2.py │ ├── ES │ ├── README.md │ ├── actor.py │ ├── es.py │ ├── es_config.py │ ├── mujoco_agent.py │ ├── mujoco_model.py │ ├── noise.py │ ├── obs_filter.py │ ├── optimizers.py │ ├── train.py │ └── utils.py │ ├── NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge │ ├── README.md │ ├── track1 │ │ ├── README.md │ │ ├── evaluate.py │ │ ├── powernet_model.py │ │ ├── rl_agent.py │ │ └── utils.py │ └── track2 │ │ ├── README.md │ │ ├── evaluate.py │ │ ├── powernet_model.py │ │ ├── rl_agent.py │ │ └── utils.py │ ├── QuickStart │ ├── README.md │ ├── cartpole_agent.py │ ├── cartpole_model.py │ └── train.py │ ├── a2c │ ├── .result │ │ ├── result_a2c_torch0.png │ │ └── result_a2c_torch1.png │ ├── README.md │ ├── a2c_config.py │ ├── actor.py │ ├── atari_agent.py │ ├── atari_model.py │ └── train.py │ ├── coma │ ├── .benchmark │ │ └── 3m_result.png │ ├── README.md │ ├── coma_config.py │ ├── sc2_agent.py │ ├── sc2_model.py │ ├── starcraft2 │ │ ├── Dockerfile │ │ ├── build_docker.sh │ │ └── install_sc2.sh │ └── train.py │ ├── cql │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ └── train.py │ ├── ddpg │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ └── train.py │ ├── dqn │ ├── .benchmark │ │ └── dqn.png │ ├── README.md │ ├── agent.py │ ├── model.py │ ├── replay_memory.py │ └── train.py │ ├── iql │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ ├── replay_buffer.py │ └── train.py │ ├── maddpg │ ├── README.md │ ├── simple_agent.py │ ├── simple_model.py │ └── train.py │ ├── maml++ │ ├── .benchmark │ │ └── loss.png │ ├── README.md │ ├── config.py │ ├── data.py │ ├── maml_agent.py │ ├── maml_algorithm.py │ ├── maml_model.py │ └── train.py │ ├── mappo │ ├── README.md │ ├── env_wrappers.py │ ├── mappo_buffer.py │ ├── simple_agent.py │ ├── simple_model.py │ └── train.py │ ├── oac │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ └── train.py │ ├── ppo │ ├── README.md │ ├── agent.py │ ├── atari_config.py │ ├── atari_model.py │ ├── env_utils.py │ ├── mujoco_config.py │ ├── mujoco_model.py │ ├── storage.py │ └── train.py │ ├── qmix │ ├── README.md │ ├── env_wrapper.py │ ├── images │ │ └── torch-qmix-result.png │ ├── qmix_agent.py │ ├── qmix_config.py │ ├── qmixer_model.py │ ├── replay_buffer.py │ ├── rnn_model.py │ ├── train.py │ └── utils.py │ ├── sac │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ └── train.py │ └── td3 │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ └── train.py ├── docs ├── EvoKit │ ├── minimal_example.rst │ ├── online_example.rst │ └── overview.rst ├── Makefile ├── _static │ ├── build_toc_group.js │ └── pygments.css ├── api_docs │ ├── index.rst │ └── utils.rst ├── apis │ ├── agent.rst │ ├── algorithm.rst │ ├── connect.rst │ ├── model.rst │ └── remote_class.rst ├── basic_structure │ ├── agent.rst │ ├── algorithm.rst │ ├── model.rst │ └── overview.rst ├── conf.py ├── features.rst ├── images │ ├── PARL-logo-1.png │ ├── PARL-logo-2.png │ ├── bar.png │ └── quickstart.png ├── implementations │ ├── a2c.rst │ ├── a3c.rst │ ├── ddpg.rst │ ├── ddqn.rst │ ├── dqn.rst │ ├── impala.rst │ ├── maddpg.rst │ ├── new_alg.rst │ ├── oac.rst │ ├── pg.rst │ ├── ppo.rst │ ├── qmix.rst │ ├── sac.rst │ └── td3.rst ├── index.rst ├── installation.rst ├── installation_guide.md ├── installation_guide_cn.md ├── locale │ └── zh_CN │ │ └── LC_MESSAGES │ │ ├── EvoKit.po │ │ ├── api_docs.po │ │ ├── apis.po │ │ ├── basic_structure.po │ │ ├── features.po │ │ ├── implementations.po │ │ ├── index.po │ │ ├── installation.po │ │ ├── overview.po │ │ ├── parallel_training.po │ │ └── tutorial.po ├── overview │ ├── abstractions.rst │ ├── features.rst │ └── parallelization.rst ├── parallel_training │ ├── comparison.png │ ├── debug.rst │ ├── elapsed_time.jpg │ ├── file_distribution.rst │ ├── gpu_cluster.rst │ ├── overview.rst │ ├── poster.png │ ├── recommended_practice1.rst │ ├── recommended_practice2.rst │ ├── serialization.rst │ └── setup.rst ├── questions │ └── distributed_training.rst ├── requirements.txt ├── test.sh ├── tutorial │ ├── add_histogram.jpg │ ├── add_scalar.jpg │ ├── getting_started.rst │ ├── maa.rst │ ├── output_as_csv.rst │ ├── save_param.rst │ └── tensorboard.rst └── zh_CN │ ├── Overview.md │ ├── tutorial │ ├── csv_logger.md │ ├── module.md │ ├── param.md │ ├── quick_start.md │ └── summary.md │ └── xparl │ ├── .images │ ├── dataset1.png │ ├── dataset2.png │ ├── dataset3.png │ ├── dataset4.png │ └── log_server.png │ ├── debug.md │ ├── distribute_files.md │ ├── example1.md │ ├── example2.md │ ├── introduction.md │ ├── serialize.md │ └── tutorial.md ├── evo_kit ├── CMakeLists.txt ├── DeepES.gif ├── README.md ├── benchmark │ └── cartpole.h ├── cmake │ └── Torch │ │ └── EvoKitConfig.cmake ├── core │ ├── include │ │ └── evo_kit │ │ │ ├── adam_optimizer.h │ │ │ ├── cached_gaussian_sampling.h │ │ │ ├── gaussian_sampling.h │ │ │ ├── optimizer.h │ │ │ ├── optimizer_factory.h │ │ │ ├── sampling_factory.h │ │ │ ├── sampling_method.h │ │ │ ├── sgd_optimizer.h │ │ │ └── utils.h │ ├── proto │ │ └── evo_kit │ │ │ └── evo_kit.proto │ └── src │ │ ├── adam_optimizer.cc │ │ ├── cached_gaussian_sampling.cc │ │ ├── gaussian_sampling.cc │ │ ├── optimizer_factory.cc │ │ ├── sampling_factory.cc │ │ ├── sgd_optimizer.cc │ │ └── utils.cc ├── demo │ ├── cartpole_config.prototxt │ ├── paddle │ │ ├── cartpole_async_solver.cc │ │ ├── cartpole_init_model.zip │ │ ├── cartpole_solver_parallel.cc │ │ └── gen_cartpole_init_model.py │ └── torch │ │ ├── CMakeLists.txt │ │ ├── cartpole_solver_parallel.cc │ │ └── model.h ├── paddle │ ├── include │ │ └── evo_kit │ │ │ ├── async_es_agent.h │ │ │ └── es_agent.h │ └── src │ │ ├── async_es_agent.cc │ │ └── es_agent.cc ├── scripts │ ├── build.sh │ └── lib_install.sh ├── test │ ├── CMakeLists.txt │ ├── include │ │ └── torch_demo_model.h │ ├── prototxt │ │ ├── torch_sin_cached_config.prototxt │ │ └── torch_sin_config.prototxt │ ├── run_test.sh │ ├── src │ │ ├── optimizers_test.cc │ │ ├── sampling_test.cc │ │ ├── torch_agent_test.cc │ │ └── utils_test.cc │ └── unit_test.cc └── torch │ └── include │ └── evo_kit │ └── es_agent.h ├── examples ├── A2C │ ├── .result │ │ ├── result_a2c_paddle0.png │ │ └── result_a2c_paddle1.png │ ├── README.md │ ├── a2c_config.py │ ├── actor.py │ ├── atari_agent.py │ ├── atari_model.py │ ├── requirements.txt │ └── train.py ├── AlphaZero │ ├── .pic │ │ ├── good_moves_rate.png │ │ └── perfect_moves_rate.png │ ├── Arena.py │ ├── Coach.py │ ├── MCTS.py │ ├── README.md │ ├── actor.py │ ├── alphazero_agent.py │ ├── connect4_aiplayer.py │ ├── connect4_game.py │ ├── connect4_model.py │ ├── main.py │ ├── requirements.txt │ └── utils.py ├── Baselines │ ├── GridDispatch_competition │ │ ├── README.md │ │ ├── paddle │ │ │ ├── README.md │ │ │ ├── env_wrapper.py │ │ │ ├── grid_agent.py │ │ │ ├── grid_model.py │ │ │ └── train.py │ │ └── torch │ │ │ ├── README.md │ │ │ ├── env_wrapper.py │ │ │ ├── grid_agent.py │ │ │ ├── grid_model.py │ │ │ └── train.py │ └── Halite_competition │ │ ├── paddle │ │ ├── README.md │ │ ├── config.py │ │ ├── encode_model.py │ │ ├── model │ │ │ └── latest_ship_model.pth │ │ ├── requirements.txt │ │ ├── rl_trainer │ │ │ ├── agent.py │ │ │ ├── algorithm.py │ │ │ ├── controller.py │ │ │ ├── model.py │ │ │ ├── obs_parser.py │ │ │ ├── policy.py │ │ │ ├── replay_memory.py │ │ │ └── utils.py │ │ ├── submission.py │ │ ├── test.ipynb │ │ ├── test.py │ │ └── train.py │ │ └── torch │ │ ├── README.md │ │ ├── config.py │ │ ├── encode_model.py │ │ ├── model │ │ └── latest_ship_model.pth │ │ ├── requirements.txt │ │ ├── rl_trainer │ │ ├── agent.py │ │ ├── algorithm.py │ │ ├── controller.py │ │ ├── model.py │ │ ├── obs_parser.py │ │ ├── policy.py │ │ ├── replay_memory.py │ │ └── utils.py │ │ ├── submission.py │ │ ├── test.ipynb │ │ ├── test.py │ │ └── train.py ├── CARLA_SAC │ ├── .benchmark │ │ ├── Lane_bend.gif │ │ └── carla_sac.png │ ├── README.md │ ├── carla_agent.py │ ├── carla_model.py │ ├── env_config.py │ ├── env_utils.py │ ├── evaluate.py │ ├── model.ckpt │ └── train.py ├── CQL │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ ├── requirements.txt │ └── train.py ├── DDPG │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ ├── requirements.txt │ └── train.py ├── DQN │ ├── README.md │ ├── cartpole.jpg │ ├── cartpole_agent.py │ ├── cartpole_model.py │ ├── requirements.txt │ └── train.py ├── DQN_variant │ ├── .benchmark │ │ └── Dueling DQN.png │ ├── README.md │ ├── atari_agent.py │ ├── atari_model.py │ ├── replay_memory.py │ ├── requirements.txt │ └── train.py ├── ES │ ├── README.md │ ├── actor.py │ ├── es.py │ ├── es_config.py │ ├── mujoco_agent.py │ ├── mujoco_model.py │ ├── noise.py │ ├── obs_filter.py │ ├── optimizers.py │ ├── requirements.txt │ ├── train.py │ └── utils.py ├── IMPALA │ ├── README.md │ ├── actor.py │ ├── atari_agent.py │ ├── atari_model.py │ ├── impala_config.py │ ├── requirements.txt │ └── train.py ├── MADDPG │ ├── README.md │ ├── requirements.txt │ ├── simple_agent.py │ ├── simple_model.py │ └── train.py ├── NeurIPS2018-AI-for-Prosthetics-Challenge │ ├── README.md │ ├── args.py │ ├── env_wrapper.py │ ├── final_submit │ │ ├── env_wrapper.py │ │ ├── mlp_model.py │ │ ├── pelvisBasedObs_scaler.npz │ │ ├── submit_model.py │ │ └── test.py │ ├── image │ │ ├── competition.png │ │ ├── curriculum-learning.png │ │ ├── demo.gif │ │ ├── fastest.png │ │ ├── last course.png │ │ └── velocity_distribution.png │ ├── multi_head_ddpg.py │ ├── opensim_agent.py │ ├── opensim_model.py │ ├── pelvisBasedObs_scaler.npz │ ├── replay_memory.py │ ├── simulator_client.py │ ├── simulator_pb2.py │ ├── simulator_pb2_grpc.py │ ├── simulator_server.py │ ├── test.py │ └── utils.py ├── NeurIPS2019-Learn-to-Move-Challenge │ ├── README.md │ ├── actor.py │ ├── env_wrapper.py │ ├── evaluate.py │ ├── evaluate_args.py │ ├── final_submit │ │ ├── env_wrapper.py │ │ ├── mlp_model.py │ │ ├── official_obs_scaler.npz │ │ ├── submit_model.py │ │ └── test.py │ ├── image │ │ └── performance.gif │ ├── official_obs_scaler.npz │ ├── opensim_agent.py │ ├── opensim_model.py │ ├── replay_memory.py │ ├── scripts │ │ ├── eval_difficulty1.sh │ │ ├── eval_difficulty2.sh │ │ ├── eval_difficulty3.sh │ │ ├── eval_difficulty3_first_target.sh │ │ ├── train_difficulty1.sh │ │ ├── train_difficulty2.sh │ │ ├── train_difficulty3.sh │ │ └── train_difficulty3_first_target.sh │ ├── train.py │ └── train_args.py ├── NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge │ ├── README.md │ ├── images │ │ └── l2rpn.jpeg │ ├── track1 │ │ ├── README.md │ │ ├── agent.py │ │ ├── es.py │ │ ├── es_agent.py │ │ ├── evaluate.py │ │ ├── powernet_model.py │ │ └── utils.py │ └── track2 │ │ ├── README.md │ │ ├── agent.py │ │ ├── es.py │ │ ├── es_agent.py │ │ ├── evaluate.py │ │ ├── powernet_model.py │ │ └── utils.py ├── OAC │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ ├── requirements.txt │ └── train.py ├── PPO │ ├── README.md │ ├── agent.py │ ├── atari_config.py │ ├── atari_model.py │ ├── env_utils.py │ ├── mujoco_config.py │ ├── mujoco_model.py │ ├── requirements_atari.txt │ ├── requirements_mujoco.txt │ ├── storage.py │ └── train.py ├── QMIX │ ├── README.md │ ├── env_wrapper.py │ ├── images │ │ └── paddle2.0_qmix_result.png │ ├── qmix_agent.py │ ├── qmix_config.py │ ├── qmixer_model.py │ ├── replay_buffer.py │ ├── requirements.txt │ ├── rnn_model.py │ ├── train.py │ └── utils.py ├── QuickStart │ ├── README.md │ ├── cartpole_agent.py │ ├── cartpole_model.py │ ├── performance.gif │ ├── requirements.txt │ └── train.py ├── SAC │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ ├── requirements.txt │ └── train.py ├── TD3 │ ├── README.md │ ├── mujoco_agent.py │ ├── mujoco_model.py │ ├── requirements.txt │ └── train.py ├── others │ └── deepes.py └── tutorials │ ├── README.md │ ├── homework │ ├── lesson2 │ │ ├── q_learning_frozenlake │ │ │ ├── agent.py │ │ │ ├── gridworld.py │ │ │ └── train.py │ │ └── sarsa_frozenlake │ │ │ ├── agent.py │ │ │ ├── gridworld.py │ │ │ └── train.py │ ├── lesson3 │ │ └── dqn_mountaincar │ │ │ ├── agent.py │ │ │ ├── model.py │ │ │ ├── replay_memory.py │ │ │ └── train.py │ ├── lesson4 │ │ └── policy_gradient_pong │ │ │ ├── agent.py │ │ │ ├── model.py │ │ │ └── train.py │ └── lesson5 │ │ └── ddpg_quadrotor │ │ ├── quadrotor_agent.py │ │ ├── quadrotor_model.py │ │ └── train.py │ ├── lesson1 │ └── gridworld.py │ ├── lesson2 │ ├── q_learning │ │ ├── agent.py │ │ ├── gridworld.py │ │ └── train.py │ └── sarsa │ │ ├── agent.py │ │ ├── gridworld.py │ │ └── train.py │ ├── lesson3 │ └── dqn │ │ ├── agent.py │ │ ├── algorithm.py │ │ ├── model.py │ │ ├── replay_memory.py │ │ └── train.py │ ├── lesson4 │ └── policy_gradient │ │ ├── agent.py │ │ ├── algorithm.py │ │ ├── model.py │ │ └── train.py │ ├── lesson5 │ └── ddpg │ │ ├── agent.py │ │ ├── algorithm.py │ │ ├── env.py │ │ ├── model.py │ │ ├── replay_memory.py │ │ └── train.py │ ├── parl2_dygraph │ ├── README.md │ ├── lesson3 │ │ ├── dqn │ │ │ ├── agent.py │ │ │ ├── algorithm.py │ │ │ ├── model.py │ │ │ ├── replay_memory.py │ │ │ └── train.py │ │ └── homework │ │ │ └── dqn_mountaincar │ │ │ ├── agent.py │ │ │ ├── model.py │ │ │ ├── replay_memory.py │ │ │ └── train.py │ ├── lesson4 │ │ ├── homework │ │ │ └── policy_gradient_pong │ │ │ │ ├── agent.py │ │ │ │ ├── model.py │ │ │ │ └── train.py │ │ └── policy_gradient │ │ │ ├── agent.py │ │ │ ├── algorithm.py │ │ │ ├── model.py │ │ │ └── train.py │ ├── lesson5 │ │ ├── ddpg │ │ │ ├── agent.py │ │ │ ├── algorithm.py │ │ │ ├── env.py │ │ │ ├── model.py │ │ │ ├── replay_memory.py │ │ │ └── train.py │ │ └── homework │ │ │ └── ddpg_quadrotor │ │ │ ├── quadrotor_agent.py │ │ │ ├── quadrotor_model.py │ │ │ └── train.py │ └── requirements.txt │ └── requirements.txt ├── papers ├── AAAI_2020.md ├── DeepMind2020.md ├── ICLR_2020.md ├── ICLR_2021.md ├── NeurIPS 2019 RL papers.numbers ├── NeurIPS_2020.md └── archive.md ├── parl ├── __init__.py ├── algorithms │ ├── __init__.py │ ├── fluid │ │ ├── __init__.py │ │ ├── a3c.py │ │ ├── ddpg.py │ │ ├── ddqn.py │ │ ├── dqn.py │ │ ├── impala │ │ │ ├── __init__.py │ │ │ ├── impala.py │ │ │ ├── tests │ │ │ │ └── vtrace_test_fluid.py │ │ │ └── vtrace.py │ │ ├── maddpg.py │ │ ├── policy_gradient.py │ │ ├── ppo.py │ │ ├── qmix.py │ │ ├── sac.py │ │ ├── td3.py │ │ └── tests │ │ │ └── algs_test_fluid.py │ ├── paddle │ │ ├── __init__.py │ │ ├── a2c.py │ │ ├── cql.py │ │ ├── ddpg.py │ │ ├── ddqn.py │ │ ├── dqn.py │ │ ├── impala │ │ │ ├── __init__.py │ │ │ ├── impala.py │ │ │ ├── tests │ │ │ │ └── vtrace_test_paddle.py │ │ │ └── vtrace.py │ │ ├── maddpg.py │ │ ├── oac.py │ │ ├── policy_gradient.py │ │ ├── ppo.py │ │ ├── qmix.py │ │ ├── sac.py │ │ └── td3.py │ └── torch │ │ ├── __init__.py │ │ ├── a2c.py │ │ ├── coma.py │ │ ├── cql.py │ │ ├── ddpg.py │ │ ├── ddqn.py │ │ ├── dqn.py │ │ ├── dt.py │ │ ├── iql.py │ │ ├── maddpg.py │ │ ├── mappo.py │ │ ├── oac.py │ │ ├── policy_gradient.py │ │ ├── ppo.py │ │ ├── qmix.py │ │ ├── sac.py │ │ └── td3.py ├── core │ ├── __init__.py │ ├── agent_base.py │ ├── algorithm_base.py │ ├── fluid │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── algorithm.py │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── attr_holder.py │ │ │ ├── layer_wrappers.py │ │ │ └── tests │ │ │ │ ├── param_name_test_fluid.py │ │ │ │ └── param_sharing_test_fluid.py │ │ ├── model.py │ │ ├── model_helper.py │ │ ├── plutils │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ └── compiler.py │ │ ├── policy_distribution.py │ │ └── tests │ │ │ ├── agent_base_test_fluid.py │ │ │ ├── agent_model_ids_test_fluid.py │ │ │ ├── algorithm_base_test_fluid.py │ │ │ ├── fluid_gpu_actor_test_fluid.py │ │ │ ├── gru_cell_test_fluid.py │ │ │ ├── model_base_test_fluid.py │ │ │ ├── model_helper_test_fluid.py │ │ │ └── policy_distribution_test_fluid.py │ ├── model_base.py │ ├── paddle │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── algorithm.py │ │ ├── model.py │ │ ├── policy_distribution.py │ │ └── tests │ │ │ ├── agent_base_actor_critic_test_paddle.py │ │ │ ├── agent_base_test_paddle.py │ │ │ ├── model_base_actor_critic_test_paddle.py │ │ │ ├── model_base_test_paddle.py │ │ │ └── policy_distribution_test_paddle.py │ ├── tests │ │ ├── agent_base_test.py │ │ ├── algorithm_base_test.py │ │ └── model_base_test.py │ └── torch │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── algorithm.py │ │ ├── model.py │ │ ├── policy_distribution.py │ │ └── tests │ │ ├── agent_base_actor_critic_test_torch.py │ │ ├── agent_base_test_torch.py │ │ ├── model_base_test_torch.py │ │ ├── model_based_actor_critic_test_torch.py │ │ └── policy_distribution_test_torch.py ├── env │ ├── __init__.py │ ├── atari_wrappers.py │ ├── compat_wrappers.py │ ├── continuous_wrappers.py │ ├── mujoco_wrappers.py │ ├── multiagent_env.py │ ├── multiagent_simple_env.py │ ├── tests │ │ └── continuous_wrappers_test.py │ └── vector_env.py ├── remote │ ├── __init__.py │ ├── client.py │ ├── cluster_monitor.py │ ├── communication.py │ ├── compatible_trick.py │ ├── cpu_resource.py │ ├── exceptions.py │ ├── future_mode │ │ ├── __init__.py │ │ ├── future_object.py │ │ ├── proxy_wrapper_nowait.py │ │ └── tests │ │ │ ├── future_mode_cluster_2_test.py │ │ │ ├── future_mode_cluster_test.py │ │ │ ├── future_mode_get_set_attribute_2_test.py │ │ │ ├── future_mode_get_set_attribute_3_test.py │ │ │ ├── future_mode_get_set_attribute_test.py │ │ │ ├── future_object_test.py │ │ │ ├── gpu │ │ │ └── future_mode_cluster_test.py │ │ │ └── proxy_wrapper_nowait_test.py │ ├── gpu_resource.py │ ├── grpc_heartbeat │ │ ├── __init__.py │ │ ├── heartbeat.proto │ │ ├── heartbeat_client.py │ │ ├── heartbeat_pb2.py │ │ ├── heartbeat_pb2_grpc.py │ │ ├── heartbeat_server.py │ │ └── tests │ │ │ ├── heartbeat_client_arguments_test.py │ │ │ ├── heartbeat_server_arguments_test.py │ │ │ └── heartbeat_test.py │ ├── job.py │ ├── log_server.py │ ├── master.py │ ├── message.py │ ├── monitor.py │ ├── proxy_wrapper.py │ ├── remote_class_serialization.py │ ├── remote_constants.py │ ├── remote_decorator.py │ ├── remote_wrapper.py │ ├── scripts.py │ ├── start.py │ ├── static │ │ ├── css │ │ │ └── bootstrap-parl.min.css │ │ ├── favicon.ico │ │ ├── js │ │ │ ├── ansi_up.js │ │ │ ├── bootstrap-table.min.js │ │ │ ├── echarts.min.js │ │ │ ├── gpu-parl.js │ │ │ ├── jquery.ajax-cross-origin.min.js │ │ │ ├── jquery.min.js │ │ │ └── parl.js │ │ └── logo.png │ ├── status.py │ ├── templates │ │ ├── clients.html │ │ ├── gpu-workers.html │ │ ├── jobs.html │ │ └── workers.html │ ├── test_utils.py │ ├── tests │ │ ├── actor_max_memory_test.py │ │ ├── actor_status_test.py │ │ ├── actor_status_wait_mode_test.py │ │ ├── client_not_init_test.py │ │ ├── cluster_2_test.py │ │ ├── cluster_3_test.py │ │ ├── cluster_monitor_2_test.py │ │ ├── cluster_monitor_3_test.py │ │ ├── cluster_monitor_test.py │ │ ├── cluster_notebook_2_test.py │ │ ├── cluster_notebook_test.py │ │ ├── cluster_status_test.py │ │ ├── cluster_test.py │ │ ├── communication_test.py │ │ ├── get_set_attribute_2_test.py │ │ ├── get_set_attribute_3_test.py │ │ ├── get_set_attribute_notebook_test.py │ │ ├── get_set_attribute_test.py │ │ ├── gpu │ │ │ ├── cluster_test.py │ │ │ ├── proxy_wrapper_test.py │ │ │ ├── remote_class_test.py │ │ │ ├── worker_manager_test.py │ │ │ └── worker_test.py │ │ ├── locate_remote_file_test_alone.py │ │ ├── log_server_test.py │ │ ├── mocking_env_test.py │ │ ├── multiprocessing │ │ │ ├── cluster_multiprocessing_1_test.py │ │ │ └── cluster_multiprocessing_2_test.py │ │ ├── proxy_wrapper_test.py │ │ ├── recursive_actor_test.py │ │ ├── remote_class_test.py │ │ ├── reset_job_test.py │ │ ├── reset_job_test_alone.py │ │ ├── rom │ │ │ └── pong.bin │ │ ├── send_job_test.py │ │ ├── simulate_client.py │ │ ├── support_RegExp_test.py │ │ ├── sync_config_file_test.py │ │ ├── test_import_module │ │ │ ├── Module2.py │ │ │ ├── main_abs_test.py │ │ │ ├── main_test_alone.py │ │ │ └── subdir │ │ │ │ ├── Module.py │ │ │ │ └── __init__.py │ │ ├── utils_test.py │ │ ├── worker_manager_test.py │ │ └── worker_test.py │ ├── utils.py │ ├── worker.py │ ├── worker_manager.py │ └── zmq_utils.py ├── tests │ ├── gym.py │ └── import_test.py └── utils │ ├── __init__.py │ ├── csv_logger.py │ ├── deprecation.py │ ├── exceptions.py │ ├── globvars.py │ ├── logger.py │ ├── machine_info.py │ ├── np_utils.py │ ├── path_utils.py │ ├── replay_memory.py │ ├── rl_utils.py │ ├── scheduler.py │ ├── summary.py │ ├── tensorboard.py │ ├── test_utils.py │ ├── tests │ ├── csv_logger_test.py │ ├── globvar_test.py │ ├── logger_test.py │ ├── not_import_dl_framework_test.py │ ├── not_import_dl_framework_test_torch.py │ ├── scheduler_test.py │ └── summary_test.py │ ├── time_stat.py │ ├── utils.py │ ├── visualdl.py │ └── window_stat.py ├── setup.py └── test_tipc ├── common_func.sh ├── configs ├── A2C │ └── train_infer_python.txt ├── CQL │ └── train_infer_python.txt ├── DDPG │ └── train_infer_python.txt ├── DQN │ └── train_infer_python.txt ├── DQN_variant │ └── train_infer_python.txt ├── ES │ └── train_infer_python.txt ├── MADDPG │ └── train_infer_python.txt ├── OAC │ └── train_infer_python.txt ├── PPO │ └── train_infer_python.txt ├── QuickStart │ └── train_infer_python.txt ├── SAC │ └── train_infer_python.txt └── TD3 │ └── train_infer_python.txt ├── docs ├── test_tipc_log.png └── test_train_inference_python.md ├── prepare.sh ├── readme.md └── test_train_inference_python.sh /.github/Aircraft.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/Aircraft.gif -------------------------------------------------------------------------------- /.github/Breakout.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/Breakout.gif -------------------------------------------------------------------------------- /.github/Half-Cheetah.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/Half-Cheetah.gif -------------------------------------------------------------------------------- /.github/PARL-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/PARL-logo.png -------------------------------------------------------------------------------- /.github/abstractions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/abstractions.png -------------------------------------------------------------------------------- /.github/decorator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/.github/decorator.png -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/Lucas-C/pre-commit-hooks.git 3 | sha: v1.0.1 4 | hooks: 5 | - id: remove-crlf 6 | files: (?!.*third_party)^.*$ | (?!.*book)^.*$ 7 | - repo: https://github.com/pre-commit/mirrors-yapf.git 8 | sha: v0.24.0 9 | hooks: 10 | - id: yapf 11 | language_version: python3.7 12 | args: ['--style={column_limit:120}' ] 13 | files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ 14 | - repo: https://github.com/pre-commit/pre-commit-hooks 15 | sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0 16 | hooks: 17 | - id: check-added-large-files 18 | - id: check-merge-conflict 19 | - id: check-symlinks 20 | - id: detect-private-key 21 | files: (?!.*third_party)^.*$ | (?!.*book)^.*$ 22 | - id: end-of-file-fixer 23 | - repo: local 24 | hooks: 25 | - id: copyright_checker 26 | name: copyright_checker 27 | entry: python ./.copyright.hook 28 | language: system 29 | files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ 30 | exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$ 31 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | formats: [] 3 | sphinx: 4 | configuration: docs/conf.py 5 | python: 6 | version: 3.8 7 | install: 8 | - requirements: docs/requirements.txt 9 | - method: setuptools 10 | path: . 11 | -------------------------------------------------------------------------------- /.teamcity/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ### [2019-07-09] 1.1-cuda9.0-cudnn7-docs: 2 | add unnit test for compling the docs 3 | - add an envionment `docs` in anaconda 4 | 5 | ### [2020-07-16] cuda9.0-cudnn7-v3: 6 | add python3.8 env 7 | 8 | ### [2022-11-22] 2.1-cuda10.1-v0: 9 | add python3.9 env, remove python2.7 env 10 | 11 | ### [2022-11-23] 2.1-cuda10.1-v0.1: 12 | add paddlepaddle_gpu-2.3.1-cp39-cp39-manylinux1_x86_64.whl 13 | 14 | ### [2022-11-25] 2.1-cuda10.1-v0.2: 15 | add test_example env 16 | -------------------------------------------------------------------------------- /.teamcity/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | # A dev image based on paddle production image 17 | 18 | FROM parl/parl-test:2.1-cuda10.1-v0.2 19 | 20 | RUN apt-get update && apt-get install -y libgflags-dev libgoogle-glog-dev libomp-dev unzip 21 | RUN apt-get update && apt-get install -y libgtest-dev && cd /usr/src/gtest && mkdir build \ 22 | && cd build && cmake .. && make && cp libgtest*.a /usr/local/lib 23 | 24 | EXPOSE 22 25 | -------------------------------------------------------------------------------- /.teamcity/requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements for unittest 2 | rarfile==3.1 3 | opencv-python<=4.3.0.34;python_version>="3" 4 | opencv-python==4.2.0.32;python_version<"3" 5 | gym 6 | details 7 | parameterized 8 | -------------------------------------------------------------------------------- /.teamcity/requirements_fluid.txt: -------------------------------------------------------------------------------- 1 | # requirements for paddle 1.8.5 unittest 2 | gym 3 | details 4 | parameterized 5 | paddlepaddle-gpu==1.8.5.post97 6 | -------------------------------------------------------------------------------- /.teamcity/requirements_torch.txt: -------------------------------------------------------------------------------- 1 | # requirements for torch unittest 2 | gym 3 | details 4 | parameterized 5 | -------------------------------------------------------------------------------- /.teamcity/windows_requirements_fluid.txt: -------------------------------------------------------------------------------- 1 | # requirements for windows unittest 2 | gym 3 | details 4 | parameterized 5 | paddlepaddle==1.8.5 6 | -------------------------------------------------------------------------------- /.teamcity/windows_requirements_paddle.txt: -------------------------------------------------------------------------------- 1 | # requirements for paddle 2.0 unittest 2 | gym 3 | details 4 | parameterized 5 | paddlepaddle>=2.0.0 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include parl/remote/static/logo.png 2 | include parl/remote/static/favicon.ico 3 | recursive-include parl/remote/templates *.html 4 | recursive-include parl/remote/static/css *.css 5 | recursive-include parl/remote/static/js *.js 6 | -------------------------------------------------------------------------------- /benchmark/fluid/A2C/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/A2C/learning_curve.png -------------------------------------------------------------------------------- /benchmark/fluid/A2C/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/A2C/result.png -------------------------------------------------------------------------------- /benchmark/fluid/DDPG/.benchmark/DDPG_HalfCheetah-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DDPG/.benchmark/DDPG_HalfCheetah-v2.png -------------------------------------------------------------------------------- /benchmark/fluid/DDPG/.benchmark/DDPG_Hopper-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DDPG/.benchmark/DDPG_Hopper-v2.png -------------------------------------------------------------------------------- /benchmark/fluid/DDPG/.benchmark/DDPG_Humanoid-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DDPG/.benchmark/DDPG_Humanoid-v2.png -------------------------------------------------------------------------------- /benchmark/fluid/DDPG/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce DDPG with PARL 2 | Based on PARL, the DDPG algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Atari benchmarks. 3 | 4 | > Paper: DDPG in [Continuous control with deep reinforcement learning](https://arxiv.org/abs/1509.02971) 5 | 6 | ### Mujoco games introduction 7 | Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco games. 8 | 9 | ### Benchmark result 10 | 11 | <img src=".benchmark/DDPG_HalfCheetah-v2.png" width = "400" height ="300" alt="DDPG_HalfCheetah-v2"/> <img src=".benchmark/DDPG_Humanoid-v2.png" width = "400" height ="300" alt="DDPG_Humanoid-v2"/> 12 | <img src=".benchmark/DDPG_Hopper-v2.png" width = "400" height ="300" alt="DDPG_Hopper-v2"/> 13 | 14 | ## How to use 15 | ### Dependencies: 16 | + python3.5+ 17 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle) 18 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL) 19 | + gym 20 | + tqdm 21 | + mujoco-py>=1.50.1.0 22 | 23 | ### Start Training: 24 | ``` 25 | # To train an agent for HalfCheetah-v2 game 26 | python train.py 27 | 28 | # To train for other game 29 | # python train.py --env [ENV_NAME] 30 | -------------------------------------------------------------------------------- /benchmark/fluid/DQN/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce DQN with PARL 2 | Based on PARL, we provide a simple demonstration of DQN. 3 | 4 | + Paper: DQN in [Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) 5 | 6 | ### Result 7 | 8 | Performance of DQN playing CartPole-v0 9 | 10 | <p align="left"> 11 | <img src="../QuickStart/performance.gif" alt="result" height="175"/> 12 | <img src="cartpole.jpg" alt="result" height="175"/> 13 | </p> 14 | 15 | ## How to use 16 | ### Dependencies: 17 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle) 18 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL) 19 | + gym 20 | + tqdm 21 | 22 | 23 | ### Start Training: 24 | ``` 25 | # To train a model for CartPole-v0 game 26 | python train.py 27 | ``` 28 | 29 | ## DQN-Variants 30 | 31 | For DQN variants such as Double DQN and Dueling DQN, please check [here](https://github.com/PaddlePaddle/PARL/tree/develop/benchmark/fluid/DQN_variant) 32 | -------------------------------------------------------------------------------- /benchmark/fluid/DQN/cartpole.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN/cartpole.jpg -------------------------------------------------------------------------------- /benchmark/fluid/DQN/cartpole_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import paddle.fluid as fluid 16 | import parl 17 | from parl import layers 18 | 19 | 20 | class CartpoleModel(parl.Model): 21 | def __init__(self, act_dim): 22 | hid1_size = 128 23 | hid2_size = 128 24 | self.fc1 = layers.fc(size=hid1_size, act='relu') 25 | self.fc2 = layers.fc(size=hid2_size, act='relu') 26 | self.fc3 = layers.fc(size=act_dim, act=None) 27 | 28 | def value(self, obs): 29 | h1 = self.fc1(obs) 30 | h2 = self.fc2(h1) 31 | Q = self.fc3(h2) 32 | return Q 33 | -------------------------------------------------------------------------------- /benchmark/fluid/DQN/train_on_xpu.md: -------------------------------------------------------------------------------- 1 | ## Running DQN on XPU 2 | We provide a simple demonstration of running DQN on XPU. 3 | 4 | + Paper: DQN in [Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) 5 | 6 | ### Result 7 | 8 | Performance of DQN playing CartPole-v0 9 | 10 | <p align="left"> 11 | <img src="../QuickStart/performance.gif" alt="result" height="175"/> 12 | <img src="cartpole.jpg" alt="result" height="175"/> 13 | </p> 14 | 15 | ## How to use 16 | ### Dependencies: 17 | + [paddlepaddle>=2.0](https://github.com/PaddlePaddle/Paddle) 18 | + [parl](https://github.com/PaddlePaddle/PARL) 19 | + gym 20 | + tqdm 21 | 22 | 23 | ### Using XPU 24 | To use xpu, you should set environment variable FLAGS_selected_xpus == {your xpu card index}. 25 | ``` 26 | export FLAGS_selected_xpus=0 27 | ``` 28 | 29 | ### Start Training: 30 | ``` 31 | # To train a model for CartPole-v0 game 32 | python train_with_xpu.py 33 | ``` 34 | -------------------------------------------------------------------------------- /benchmark/fluid/DQN_variant/.benchmark/merge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/.benchmark/merge.png -------------------------------------------------------------------------------- /benchmark/fluid/DQN_variant/.benchmark/table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/.benchmark/table.png -------------------------------------------------------------------------------- /benchmark/fluid/DQN_variant/rom_files/battle_zone.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/rom_files/battle_zone.bin -------------------------------------------------------------------------------- /benchmark/fluid/DQN_variant/rom_files/breakout.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/rom_files/breakout.bin -------------------------------------------------------------------------------- /benchmark/fluid/DQN_variant/rom_files/pong.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/DQN_variant/rom_files/pong.bin -------------------------------------------------------------------------------- /benchmark/fluid/DQN_variant/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import cv2 16 | from atari import AtariPlayer 17 | from atari_wrapper import FrameStack, MapState, FireResetEnv 18 | 19 | 20 | def get_player(rom, 21 | image_size, 22 | viz=False, 23 | train=False, 24 | frame_skip=1, 25 | context_len=1): 26 | env = AtariPlayer( 27 | rom, 28 | frame_skip=frame_skip, 29 | viz=viz, 30 | live_lost_as_eoe=train, 31 | max_num_frames=60000) 32 | env = FireResetEnv(env) 33 | env = MapState(env, lambda im: cv2.resize(im, image_size)) 34 | if not train: 35 | # in training, context is taken care of in expreplay buffer 36 | env = FrameStack(env, context_len) 37 | return env 38 | -------------------------------------------------------------------------------- /benchmark/fluid/ES/es.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import parl 16 | 17 | __all__ = ['ES'] 18 | 19 | 20 | class ES(parl.Algorithm): 21 | def __init__(self, model): 22 | """ES algorithm. 23 | 24 | Since parameters of the model is updated in the numpy level, `learn` function is not needed 25 | in this algorithm. 26 | 27 | Args: 28 | model(`parl.Model`): policy model of ES algorithm. 29 | """ 30 | self.model = model 31 | 32 | def predict(self, obs): 33 | """Use the policy model to predict actions of observations. 34 | 35 | Args: 36 | obs(layers.data): data layer of observations. 37 | 38 | Returns: 39 | tensor of predicted actions. 40 | """ 41 | return self.model(obs) 42 | -------------------------------------------------------------------------------- /benchmark/fluid/ES/es_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | config = { 16 | #========== remote config ========== 17 | 'master_address': 'localhost:8037', 18 | 19 | #========== env config ========== 20 | 'env_name': 'Humanoid-v1', 21 | 22 | #========== actor config ========== 23 | 'actor_num': 96, 24 | 'action_noise_std': 0.01, 25 | 'min_task_runtime': 0.2, 26 | 'eval_prob': 0.003, 27 | 'filter_update_prob': 0.01, 28 | 29 | #========== learner config ========== 30 | 'stepsize': 0.01, 31 | 'min_episodes_per_batch': 1000, 32 | 'min_steps_per_batch': 10000, 33 | 'noise_size': 200000000, 34 | 'noise_stdev': 0.02, 35 | 'l2_coeff': 0.005, 36 | 'report_window_size': 10, 37 | } 38 | -------------------------------------------------------------------------------- /benchmark/fluid/ES/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/ES/learning_curve.png -------------------------------------------------------------------------------- /benchmark/fluid/ES/mujoco_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import paddle.fluid as fluid 16 | import parl 17 | from parl import layers 18 | 19 | 20 | class MujocoModel(parl.Model): 21 | def __init__(self, act_dim): 22 | hid1_size = 256 23 | hid2_size = 256 24 | 25 | self.fc1 = layers.fc(size=hid1_size, act='tanh') 26 | self.fc2 = layers.fc(size=hid2_size, act='tanh') 27 | self.fc3 = layers.fc(size=act_dim) 28 | 29 | def forward(self, obs): 30 | hid1 = self.fc1(obs) 31 | hid2 = self.fc2(hid1) 32 | means = self.fc3(hid2) 33 | return means 34 | -------------------------------------------------------------------------------- /benchmark/fluid/ES/noise.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py 5 | 6 | import numpy as np 7 | 8 | 9 | class SharedNoiseTable(object): 10 | """Shared noise table used by learner and actor. 11 | 12 | Learner and actor will create a same noise table by passing the same seed. 13 | With the same noise table, learner and actor can communicate the noises by 14 | index of noise table instead of numpy array of noises. 15 | """ 16 | 17 | def __init__(self, noise_size, seed=1024): 18 | self.noise_size = noise_size 19 | self.seed = seed 20 | self.noise = self._create_noise() 21 | 22 | def _create_noise(self): 23 | noise = np.random.RandomState(self.seed).randn(self.noise_size).astype( 24 | np.float32) 25 | return noise 26 | 27 | def get(self, i, dim): 28 | return self.noise[i:i + dim] 29 | 30 | def sample_index(self, dim): 31 | return np.random.randint(0, len(self.noise) - dim + 1) 32 | -------------------------------------------------------------------------------- /benchmark/fluid/GA3C/.benchmark/GA3C_BeamRider.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_BeamRider.jpg -------------------------------------------------------------------------------- /benchmark/fluid/GA3C/.benchmark/GA3C_Breakout.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_Breakout.jpg -------------------------------------------------------------------------------- /benchmark/fluid/GA3C/.benchmark/GA3C_Pong.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_Pong.jpg -------------------------------------------------------------------------------- /benchmark/fluid/GA3C/.benchmark/GA3C_Qbert.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_Qbert.jpg -------------------------------------------------------------------------------- /benchmark/fluid/GA3C/.benchmark/GA3C_SpaceInvaders.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/GA3C/.benchmark/GA3C_SpaceInvaders.jpg -------------------------------------------------------------------------------- /benchmark/fluid/IMPALA/.benchmark/IMPALA_BeamRider.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_BeamRider.jpg -------------------------------------------------------------------------------- /benchmark/fluid/IMPALA/.benchmark/IMPALA_Breakout.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_Breakout.jpg -------------------------------------------------------------------------------- /benchmark/fluid/IMPALA/.benchmark/IMPALA_Pong.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_Pong.jpg -------------------------------------------------------------------------------- /benchmark/fluid/IMPALA/.benchmark/IMPALA_Qbert.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_Qbert.jpg -------------------------------------------------------------------------------- /benchmark/fluid/IMPALA/.benchmark/IMPALA_SpaceInvaders.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/IMPALA/.benchmark/IMPALA_SpaceInvaders.jpg -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple.gif -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple.png -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_adversary.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_adversary.gif -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_adversary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_adversary.png -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_crypto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_crypto.png -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_push.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_push.gif -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_push.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_push.png -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_reference.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_reference.gif -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_reference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_reference.png -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_speaker_listener.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_speaker_listener.gif -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_speaker_listener.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_speaker_listener.png -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_spread.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_spread.gif -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_spread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_spread.png -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_tag.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_tag.gif -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_tag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_tag.png -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_world_comm.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_world_comm.gif -------------------------------------------------------------------------------- /benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_world_comm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/MADDPG/.benchmark/MADDPG_simple_world_comm.png -------------------------------------------------------------------------------- /benchmark/fluid/PPO/.benchmark/PPO_HalfCheetah-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/PPO/.benchmark/PPO_HalfCheetah-v2.png -------------------------------------------------------------------------------- /benchmark/fluid/PPO/.benchmark/PPO_Hopper-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/PPO/.benchmark/PPO_Hopper-v2.png -------------------------------------------------------------------------------- /benchmark/fluid/PPO/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce PPO with PARL 2 | Based on PARL, the PPO algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Atari benchmarks. 3 | 4 | Include following approach: 5 | + Clipped Surrogate Objective 6 | + Adaptive KL Penalty Coefficient 7 | 8 | > Paper: PPO in [Proximal Policy Optimization Algorithms](https://arxiv.org/abs/1707.06347) 9 | 10 | ### Mujoco games introduction 11 | Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco games. 12 | 13 | ### Benchmark result 14 | 15 | <img src=".benchmark/PPO_HalfCheetah-v2.png" width = "400" height ="300" alt="PPO_HalfCheetah-v2" /> <img src=".benchmark/PPO_Hopper-v2.png" width = "400" height ="300" alt="PPO_Hopper-v2" /> 16 | 17 | ## How to use 18 | ### Dependencies: 19 | + python3.5+ 20 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle) 21 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL) 22 | + gym 23 | + tqdm 24 | + mujoco-py>=1.50.1.0 25 | 26 | ### Start Training: 27 | ``` 28 | # To train an agent for HalfCheetah-v2 game (default: CLIP loss) 29 | python train.py 30 | 31 | # To train for different game and different loss type 32 | # python train.py --env [ENV_NAME] --loss_type [CLIP|KLPEN] 33 | -------------------------------------------------------------------------------- /benchmark/fluid/Prioritized_DQN/README.md: -------------------------------------------------------------------------------- 1 | ## Prioritized Experience Replay 2 | Reproducing paper [Prioritized Experience Replay](https://arxiv.org/abs/1511.05952). 3 | 4 | Prioritized experience replay (PER) develops a framework for prioritizing experience, so as to replay important transitions more frequently. There are two variants of prioritizing the transitions, rank-based and proportional-based. Our implementation is the proportional variant, which has a better performance, as reported in the original paper. 5 | 6 | ## Reproduced Results 7 | Results have been reproduced with [Double DQN](https://arxiv.org/abs/1509.06461v3) on following three environments: 8 | 9 | <p align="center"> 10 | <img src="result.png"/> 11 | </p> 12 | 13 | ## How to use 14 | 15 | ### Dependencies: 16 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle) 17 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL) 18 | + gym[atari]==0.17.2 19 | + atari-py==0.2.6 20 | + tqdm 21 | + [ale_python_interface](https://github.com/mgbellemare/Arcade-Learning-Environment) 22 | 23 | 24 | ### Start Training: 25 | Train on BattleZone game: 26 | ```bash 27 | python train.py --rom ./rom_files/battle_zone.bin 28 | ``` 29 | 30 | > To train on more games, you can install more rom files from [here](https://github.com/openai/atari-py/tree/master/atari_py/atari_roms). 31 | -------------------------------------------------------------------------------- /benchmark/fluid/Prioritized_DQN/atari.py: -------------------------------------------------------------------------------- 1 | ../DQN_variant/atari.py -------------------------------------------------------------------------------- /benchmark/fluid/Prioritized_DQN/atari_wrapper.py: -------------------------------------------------------------------------------- 1 | ../DQN_variant/atari_wrapper.py -------------------------------------------------------------------------------- /benchmark/fluid/Prioritized_DQN/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/Prioritized_DQN/result.png -------------------------------------------------------------------------------- /benchmark/fluid/Prioritized_DQN/rom_files: -------------------------------------------------------------------------------- 1 | ../DQN_variant/rom_files -------------------------------------------------------------------------------- /benchmark/fluid/Prioritized_DQN/utils.py: -------------------------------------------------------------------------------- 1 | ../DQN_variant/utils.py -------------------------------------------------------------------------------- /benchmark/fluid/QMIX/README.md: -------------------------------------------------------------------------------- 1 | ## QMIX based on PARL and PaddlePaddle 2 | We reproduce the QMIX based on **PARL** and **PaddlePaddle**, reaching the same level of indicators as the paper in StarCraft2 benchmarks. 3 | ### QMIX 4 | QMIX is a **value-based** multi-agent reinforcement learning algorithm. 5 | Learn more about QMIX from: [QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1803.11485) 6 | ### StarCraft2 Environment 7 | Paper: [The StarCraft Multi-Agent Challenge](https://arxiv.org/pdf/1902.04043) 8 | Github Repositories: [smac](https://github.com/oxwhirl/smac) 9 | ## Benchmark Results 10 | <img src="images/paddle-qmix-result.png" width = "1200" alt="Performance" /> 11 | 12 | - We trained our model in 5 different scenarios: *"3m", "8m", "2s_3z", "3s_5z"* and *"1c_3s_5z"*. 13 | - The **difficulty** in all scenarios are set to be "7" (very difficult). 14 | - We trained our model 3 times for each scenario. 15 | 16 | ## How to Use 17 | ### Dependencies 18 | + python3.5+ 19 | - [PaddlePaddle==1.8.5](https://github.com/PaddlePaddle/Paddle) 20 | - [parl<2.0.0](https://github.com/PaddlePaddle/PARL) 21 | - [smac](https://github.com/oxwhirl/smac) 22 | 23 | ### Start Training 24 | 1. Modify the config in `qmix_config.py`. 25 | 2. Start training: 26 | ```bash 27 | python train.py 28 | ``` 29 | 3. View the training process with tensorboard: 30 | ```bash 31 | tensorboard --logdir ./ 32 | ``` 33 | -------------------------------------------------------------------------------- /benchmark/fluid/QMIX/images/paddle-qmix-result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/QMIX/images/paddle-qmix-result.png -------------------------------------------------------------------------------- /benchmark/fluid/QuickStart/README.md: -------------------------------------------------------------------------------- 1 | ## Quick Start 2 | Train an agent with PARL to solve the CartPole problem, a classical benchmark in RL. 3 | 4 | ## How to use 5 | ### Dependencies: 6 | 7 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle) 8 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL) 9 | + gym 10 | 11 | ### Start Training: 12 | ``` 13 | # Install dependencies 14 | pip install paddlepaddle 15 | # Or use Cuda: pip install paddlepaddle-gpu 16 | 17 | pip install gym 18 | git clone https://github.com/PaddlePaddle/PARL.git 19 | cd PARL 20 | pip install . 21 | 22 | # Train model 23 | cd examples/QuickStart/ 24 | python train.py 25 | ``` 26 | 27 | ### Expected Result 28 | <img src="performance.gif" width = "300" height ="200" alt="result"/> 29 | 30 | The agent can get around 200 points in a few minutes. 31 | -------------------------------------------------------------------------------- /benchmark/fluid/QuickStart/cartpole_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import parl 16 | from parl import layers 17 | 18 | 19 | class CartpoleModel(parl.Model): 20 | def __init__(self, act_dim): 21 | act_dim = act_dim 22 | hid1_size = act_dim * 10 23 | 24 | self.fc1 = layers.fc(size=hid1_size, act='tanh') 25 | self.fc2 = layers.fc(size=act_dim, act='softmax') 26 | 27 | def forward(self, obs): 28 | out = self.fc1(obs) 29 | out = self.fc2(out) 30 | return out 31 | -------------------------------------------------------------------------------- /benchmark/fluid/QuickStart/performance.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/QuickStart/performance.gif -------------------------------------------------------------------------------- /benchmark/fluid/SAC/.benchmark/merge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/SAC/.benchmark/merge.png -------------------------------------------------------------------------------- /benchmark/fluid/SAC/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce SAC with PARL 2 | Based on PARL, the SAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | Include following approaches: 5 | + DDPG Style with Stochastic Policy 6 | + Maximum Entropy 7 | 8 | > Paper: SAC in [Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor](https://arxiv.org/abs/1801.01290) 9 | 10 | ### Mujoco games introduction 11 | Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco games. 12 | 13 | ### Benchmark result 14 | 15 | <img src=".benchmark/merge.png" width = "1500" height ="260" alt="Performance" /> 16 | 17 | ## How to use 18 | ### Dependencies: 19 | + python3.5+ 20 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle) 21 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL) 22 | + gym 23 | + mujoco-py>=1.50.1.0 24 | 25 | ### Start Training: 26 | ``` 27 | # To train an agent for HalfCheetah-v2 game 28 | python train.py 29 | 30 | # To train for different games 31 | # python train.py --env [ENV_NAME] 32 | -------------------------------------------------------------------------------- /benchmark/fluid/TD3/.benchmark/merge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/fluid/TD3/.benchmark/merge.png -------------------------------------------------------------------------------- /benchmark/fluid/TD3/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce TD3 with PARL 2 | Based on PARL, the TD3 algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | Include following approaches: 5 | + Clipped Double Q-learning 6 | + Target Networks and Delayed Policy Update 7 | + Target Policy Smoothing Regularization 8 | 9 | > Paper: TD3 in [Addressing Function Approximation Error in Actor-Critic Methods](https://arxiv.org/abs/1802.09477) 10 | 11 | ### Mujoco games introduction 12 | Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco games. 13 | 14 | ### Benchmark result 15 | 16 | <img src=".benchmark/merge.png" width = "1500" height ="260" alt="Performance" /> 17 | 18 | ## How to use 19 | ### Dependencies: 20 | + python3.5+ 21 | + [paddlepaddle==1.8.5](https://github.com/PaddlePaddle/Paddle) 22 | + [parl<2.0.0](https://github.com/PaddlePaddle/PARL) 23 | + gym 24 | + mujoco-py>=1.50.1.0 25 | 26 | ### Start Training: 27 | ``` 28 | # To train an agent for HalfCheetah-v2 game 29 | python train.py 30 | 31 | # To train for different game and different loss type 32 | # python train.py --env [ENV_NAME] 33 | -------------------------------------------------------------------------------- /benchmark/fluid/offline-Q-learning/atari.py: -------------------------------------------------------------------------------- 1 | ../DQN_variant/atari.py -------------------------------------------------------------------------------- /benchmark/fluid/offline-Q-learning/atari_wrapper.py: -------------------------------------------------------------------------------- 1 | ../DQN_variant/atari_wrapper.py -------------------------------------------------------------------------------- /benchmark/fluid/offline-Q-learning/rom_files: -------------------------------------------------------------------------------- 1 | ../DQN_variant/rom_files -------------------------------------------------------------------------------- /benchmark/fluid/offline-Q-learning/utils.py: -------------------------------------------------------------------------------- 1 | ../DQN_variant/utils.py -------------------------------------------------------------------------------- /benchmark/torch/AlphaZero/.pic/good_moves.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/AlphaZero/.pic/good_moves.png -------------------------------------------------------------------------------- /benchmark/torch/AlphaZero/.pic/perfect_moves.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/AlphaZero/.pic/perfect_moves.png -------------------------------------------------------------------------------- /benchmark/torch/AlphaZero/gen_submission.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import base64 17 | import inspect 18 | import os 19 | 20 | assert len(sys.argv) == 2, "please specify model path." 21 | model_path = sys.argv[1] 22 | 23 | with open(model_path, 'rb') as f: 24 | raw_bytes = f.read() 25 | encoded_weights = base64.encodebytes(raw_bytes) 26 | 27 | # encode weights of model to byte string 28 | submission_file = """ 29 | import base64 30 | decoded = base64.b64decode({}) 31 | 32 | """.format(encoded_weights) 33 | 34 | # insert code snippet of loading weights 35 | with open('submission_template.py', 'r') as f: 36 | submission_file += ''.join(f.readlines()) 37 | 38 | # generate final submission file 39 | with open('submission.py', 'w') as f: 40 | f.write(submission_file) 41 | -------------------------------------------------------------------------------- /benchmark/torch/DT/README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | Based on PARL, we provide the implementation of decision transformer, with the same performance as reported in the original paper. 3 | 4 | > Paper: [Decision Transformer: Reinforcement 5 | Learning via Sequence Modeling](https://arxiv.org/abs/2106.01345) 6 | 7 | ### Dataset for RL 8 | Follow the installation instruction in [D4RL](https://github.com/Farama-Foundation/D4RL) to install D4RL. 9 | Then run the scripts in `data` directory to download dataset for traininig. 10 | ```shell 11 | python download_d4rl_datasets.py 12 | ``` 13 | 14 | 15 | ### Benchmark result 16 | #### 1. Mujoco results 17 | <p align="center"> 18 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/DT/torch/mujoco_result.png" alt="mujoco-result"/> 19 | </p> 20 | 21 | + Each experiment was run three times with different random seeds 22 | 23 | ## How to use 24 | ### Dependencies: 25 | + [D4RL](//github.com/Farama-Foundation/D4RL) 26 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 27 | + pytorch 28 | + gym==0.18.3 29 | + mujoco-py==2.0.2.13 30 | + transformers==4.5.1 31 | 32 | 33 | ### Training: 34 | 35 | ```shell 36 | # To train an agent for `hooper` environment with `medium` dataset 37 | python train.py --env hopper --dataset medium 38 | 39 | # To train an agent for `hooper` environment with `expert` dataset 40 | python train.py --env hopper --dataset expert 41 | ``` 42 | 43 | 44 | ### Reference 45 | 46 | https://github.com/kzl/decision-transformer 47 | -------------------------------------------------------------------------------- /benchmark/torch/ES/es.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import parl 16 | 17 | __all__ = ['ES'] 18 | 19 | 20 | class ES(parl.Algorithm): 21 | def __init__(self, model): 22 | """ES algorithm. 23 | 24 | Since parameters of the model is updated in the numpy level, `learn` function is not needed 25 | in this algorithm. 26 | 27 | Args: 28 | model(`parl.Model`): policy model of ES algorithm. 29 | """ 30 | self.model = model 31 | 32 | def predict(self, obs): 33 | """Use the policy model to predict actions of observations. 34 | 35 | Args: 36 | obs(layers.data): data layer of observations. 37 | 38 | Returns: 39 | tensor of predicted actions. 40 | """ 41 | 42 | return self.model(obs) 43 | -------------------------------------------------------------------------------- /benchmark/torch/ES/es_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | config = { 16 | #========== remote config ========== 17 | 'master_address': 'localhost:8010', 18 | #========== env config ========== 19 | 'env_name': 'Humanoid-v4', 20 | #========== actor config ========== 21 | 'actor_num': 24, 22 | 'action_noise_std': 0.01, 23 | 'min_task_runtime': 0.2, 24 | 'eval_prob': 0.003, 25 | 'filter_update_prob': 0.01, 26 | 27 | #========== learner config ========== 28 | 'stepsize': 0.01, 29 | 'train_steps': 200, 30 | 'min_episodes_per_batch': 1000, 31 | 'min_steps_per_batch': 10000, 32 | 'noise_size': 200000000, 33 | 'noise_stdev': 0.02, 34 | 'l2_coeff': 0.005, 35 | 'report_window_size': 10, 36 | } 37 | -------------------------------------------------------------------------------- /benchmark/torch/ES/mujoco_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numpy as np 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | 20 | import parl 21 | 22 | 23 | class MujocoModel(parl.Model): 24 | def __init__(self, obs_dim, act_dim): 25 | super(MujocoModel, self).__init__() 26 | 27 | hid1_size = 256 28 | hid2_size = 256 29 | self.fc1 = nn.Linear(obs_dim, hid1_size) 30 | self.fc2 = nn.Linear(hid1_size, hid2_size) 31 | self.fc3 = nn.Linear(hid2_size, act_dim) 32 | 33 | def forward(self, obs): 34 | hid1 = F.tanh(self.fc1(obs)) 35 | hid2 = F.tanh(self.fc2(hid1)) 36 | means = self.fc3(hid2) 37 | return means 38 | -------------------------------------------------------------------------------- /benchmark/torch/ES/noise.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py 5 | 6 | import numpy as np 7 | 8 | 9 | class SharedNoiseTable(object): 10 | """Shared noise table used by learner and actor. 11 | 12 | Learner and actor will create a same noise table by passing the same seed. 13 | With the same noise table, learner and actor can communicate the noises by 14 | index of noise table instead of numpy array of noises. 15 | """ 16 | 17 | def __init__(self, noise_size, seed=1024): 18 | self.noise_size = noise_size 19 | self.seed = seed 20 | self.noise = self._create_noise() 21 | 22 | def _create_noise(self): 23 | noise = np.random.RandomState(self.seed).randn(self.noise_size).astype( 24 | np.float32) 25 | return noise 26 | 27 | def get(self, i, dim): 28 | return self.noise[i:i + dim] 29 | 30 | def sample_index(self, dim): 31 | return np.random.randint(0, len(self.noise) - dim + 1) 32 | -------------------------------------------------------------------------------- /benchmark/torch/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/README.md: -------------------------------------------------------------------------------- 1 | ## NeurIPS2020 L2RPN Challenge 2 | 3 | The **PARL** team gets the first place for all tracks (both Robustness Track and Adaptability Track) in *NeurIPS2020 Learning-to-Run-a-Power-Network* challenge! 4 | 5 | <p align="center"> 6 | <img src="../../../examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/images/l2rpn.jpeg" alt="PARL" height="300" /> 7 | </p> 8 | 9 | ## Paper Citation 10 | 11 | If you use our code for your experiments or found it helpful, consider citing the following paper: 12 | 13 | <pre> 14 | @inproceedings{Zhou2021ActionSB, 15 | title={Action Set Based Policy Optimization for Safe Power Grid Management}, 16 | author={Bo Zhou and Hongsheng Zeng and Yuecheng Liu and Kejiao Li and Fan Wang and Hao Tian}, 17 | journal={ECML PKDD2021}, 18 | year={2021} 19 | } 20 | </pre> 21 | -------------------------------------------------------------------------------- /benchmark/torch/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track1/README.md: -------------------------------------------------------------------------------- 1 | ## Dependencies 2 | - python3.6 3 | - [parl==1.3.2](https://github.com/PaddlePaddle/PARL) 4 | - Pytorch==1.6.0 5 | - [grid2op==1.2.2](https://github.com/rte-france/Grid2Op) 6 | - [lightsim2grid==0.2.4](https://github.com/BDonnot/lightsim2grid) 7 | 8 | ## How to evaluate 9 | 1. Clone the repository. 10 | 2. Download the saved models from online storage service: [Baidu Pan](https://pan.baidu.com/s/14M1ccn72rgE_7X19e94bkQ) (password: `asiv`) or [Google Drive](https://drive.google.com/file/d/1mDE7K__QFHHxWCWIq53egtjPAVC0Jt0k/view?usp=sharing) 11 | 3. Unpack the file: 12 | ``` 13 | tar -xvzf saved_files.tar.gz 14 | ``` 15 | 4. evaluate the result: 16 | ``` 17 | python evaluate.py --nb_episode=10 18 | ``` 19 | -------------------------------------------------------------------------------- /benchmark/torch/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track2/README.md: -------------------------------------------------------------------------------- 1 | ## Dependencies 2 | - python3.6 3 | - [parl==1.3.2](https://github.com/PaddlePaddle/PARL) 4 | - Pytorch==1.6.0 5 | - [grid2op==1.2.2](https://github.com/rte-france/Grid2Op) 6 | - [lightsim2grid==0.2.4](https://github.com/BDonnot/lightsim2grid) 7 | 8 | ## How to evaluate 9 | 1. Clone the repository. 10 | 2. Download the saved models from online storage service: [Baidu Pan](https://pan.baidu.com/s/1qpylN5QJA-h6EcaoUC1sgg) (password: `0r7v`) or [Google Drive](https://drive.google.com/file/d/1FuPz5bEeMSTM9QMR3cpbzH69TLMhklr4/view?usp=sharing) 11 | 3. Unpack the file: 12 | ``` 13 | tar -zxvf saved_files.tar.gz 14 | ``` 15 | 4. evaluate the result: 16 | ``` 17 | python evaluate.py --nb_episode=10 18 | ``` 19 | -------------------------------------------------------------------------------- /benchmark/torch/QuickStart/README.md: -------------------------------------------------------------------------------- 1 | ## PyTorch benchmark Quick Start 2 | Train an agent with PARL to solve the CartPole problem, a classical benchmark in RL. 3 | 4 | ## How to use 5 | ### Dependencies: 6 | 7 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 8 | + torch 9 | + gym 10 | 11 | ### Start Training: 12 | ``` 13 | # Install dependencies 14 | pip install torch torchvision gym 15 | 16 | git clone https://github.com/PaddlePaddle/PARL.git 17 | cd PARL 18 | pip install . 19 | 20 | # Train model 21 | cd benchmark/torch/QuickStart 22 | python train.py 23 | ``` 24 | 25 | ### Expected Result 26 | <img src="https://github.com/PaddlePaddle/PARL/blob/develop/examples/QuickStart/performance.gif" width = "300" height ="200" alt="result"/> 27 | 28 | The agent can get around 200 points in a few minutes. 29 | -------------------------------------------------------------------------------- /benchmark/torch/QuickStart/cartpole_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | import parl 19 | 20 | 21 | class CartpoleModel(parl.Model): 22 | """ Linear network to solve Cartpole problem. 23 | 24 | Args: 25 | obs_dim (int): Dimension of observation space. 26 | act_dim (int): Dimension of action space. 27 | """ 28 | 29 | def __init__(self, obs_dim, act_dim): 30 | super(CartpoleModel, self).__init__() 31 | hid1_size = act_dim * 10 32 | self.fc1 = nn.Linear(obs_dim, hid1_size) 33 | self.fc2 = nn.Linear(hid1_size, act_dim) 34 | 35 | def forward(self, x): 36 | out = torch.tanh(self.fc1(x)) 37 | prob = F.softmax(self.fc2(out), dim=-1) 38 | return prob 39 | -------------------------------------------------------------------------------- /benchmark/torch/a2c/.result/result_a2c_torch0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/a2c/.result/result_a2c_torch0.png -------------------------------------------------------------------------------- /benchmark/torch/a2c/.result/result_a2c_torch1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/a2c/.result/result_a2c_torch1.png -------------------------------------------------------------------------------- /benchmark/torch/a2c/a2c_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | config = { 16 | 17 | #========== remote config ========== 18 | 'master_address': 'localhost:8010', 19 | #========== env config ========== 20 | 'env_name': 'PongNoFrameskip-v4', 21 | 'env_dim': 84, 22 | 23 | #========== actor config ========== 24 | 'actor_num': 5, 25 | 'env_num': 5, 26 | 'sample_batch_steps': 20, 27 | 28 | #========== learner config ========== 29 | 'max_sample_steps': int(1e7), 30 | 'gamma': 0.99, 31 | 'lambda': 1.0, 32 | 33 | # start learning rate 34 | 'start_lr': 0.001, 35 | 36 | # coefficient of policy entropy adjustment schedule: (train_step, coefficient) 37 | 'entropy_coeff_scheduler': [(0, -0.01)], 38 | 'vf_loss_coeff': 0.5, 39 | 'log_metrics_interval_s': 10, 40 | 'learning_rate': 0.001, 41 | } 42 | -------------------------------------------------------------------------------- /benchmark/torch/coma/.benchmark/3m_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/coma/.benchmark/3m_result.png -------------------------------------------------------------------------------- /benchmark/torch/coma/starcraft2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:9.2-cudnn7-devel-ubuntu16.04 2 | MAINTAINER Tabish Rashid 3 | 4 | # CUDA includes 5 | ENV CUDA_PATH /usr/local/cuda 6 | ENV CUDA_INCLUDE_PATH /usr/local/cuda/include 7 | ENV CUDA_LIBRARY_PATH /usr/local/cuda/lib64 8 | 9 | # Ubuntu Packages 10 | RUN apt-get update -y && apt-get install software-properties-common -y && \ 11 | add-apt-repository -y multiverse && apt-get update -y && apt-get upgrade -y && \ 12 | apt-get install -y apt-utils nano vim git man build-essential wget sudo && \ 13 | rm -rf /var/lib/apt/lists/* 14 | 15 | # Install python3 pip3 16 | RUN apt-get update 17 | RUN apt-get -y install python3 18 | RUN apt-get -y install python3-pip 19 | RUN pip3 install --upgrade pip 20 | 21 | #### ------------------------------------------------------------------- 22 | #### install parl 23 | #### ------------------------------------------------------------------- 24 | RUN pip3 install parl 25 | 26 | #### ------------------------------------------------------------------- 27 | #### install SMAC 28 | #### ------------------------------------------------------------------- 29 | RUN pip3 install git+https://github.com/oxwhirl/smac.git 30 | 31 | #### ------------------------------------------------------------------- 32 | #### install pytorch 33 | #### ------------------------------------------------------------------- 34 | RUN pip3 install torch 35 | 36 | 37 | ENV SC2PATH /parl/starcraft2/StarCraftII 38 | WORKDIR /parl 39 | -------------------------------------------------------------------------------- /benchmark/torch/coma/starcraft2/build_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #### ------------------------------------------------------------------- 4 | #### build docker image 5 | #### ------------------------------------------------------------------- 6 | echo 'Building Dockerfile with image name parl-starcraft2:1.0' 7 | docker build -t parl-starcraft2:1.0 . 8 | -------------------------------------------------------------------------------- /benchmark/torch/ddpg/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce DDPG with PARL 2 | Based on PARL, the DDPG algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | > Paper: DDPG in [Continuous control with deep reinforcement learning](https://arxiv.org/abs/1509.02971) 5 | 6 | ### Mujoco games introduction 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco). 8 | 9 | ### Benchmark result 10 | 11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/DDPG/torch/result.png" width="600" alt="DDPG_results"/> 12 | 13 | ## How to use 14 | ### Dependencies: 15 | + python3.7+ 16 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 17 | + gym>=0.26.0 18 | + torch 19 | + mujoco>=2.2.2 20 | 21 | ### Start Training: 22 | ``` 23 | # To train an agent for HalfCheetah-v4 game 24 | python train.py 25 | 26 | # To train for other game 27 | # python train.py --env [ENV_NAME] 28 | -------------------------------------------------------------------------------- /benchmark/torch/dqn/.benchmark/dqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/dqn/.benchmark/dqn.png -------------------------------------------------------------------------------- /benchmark/torch/maml++/.benchmark/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/maml++/.benchmark/loss.png -------------------------------------------------------------------------------- /benchmark/torch/maml++/README.md: -------------------------------------------------------------------------------- 1 | # Regression MAML/MAML++ with PARL 2 | 3 | Implementation of [MAML](https://arxiv.org/abs/1703.03400) and [MAML++](https://arxiv.org/abs/1810.09502) with PyTorch and PARL that works for regression tasks. 4 | 5 | ## Benchmark result 6 | 7 | We follow the regression task setting from [Meta-SGD](https://arxiv.org/pdf/1707.09835.pdf), where the model is going to learn different sine waves. We train and test the model with 5-shot tasks. The figure below shows the test losses of MAML and MAML++ on 10000 randomly generated sine waves. 8 | 9 | <p align="center"> 10 | <img src=".benchmark/loss.png" alt="result"/> 11 | </p> 12 | 13 | | MAML(from Meta-SGD) | Mate-SGD(from Meta-SGD) | MAML (ours) | MAML++ (ours)| 14 | | --- | --- | --- | --- | 15 | | 1.13±0.18 |0.90±0.16| 0.93±0.02 | 0.34±0.01 | 16 | 17 | ## How to use 18 | 19 | ### Dependencies: 20 | 21 | + python>=3.7.0 22 | + pytorch==1.7.1 23 | + parl 24 | 25 | ### Start Training: 26 | 27 | ~~~ 28 | python3 train.py 29 | ~~~ 30 | -------------------------------------------------------------------------------- /benchmark/torch/oac/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce OAC with PARL 2 | Based on PARL, the OAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | > Paper: OAC in [Better Exploration with Optimistic Actor-Critic](https://arxiv.org/abs/1910.12807) 5 | 6 | ### Mujoco games introduction 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco). 8 | 9 | ### Benchmark result 10 | 11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/OAC/torch/result.png" width="600" alt="OAC_results"/> 12 | 13 | + Each experiment was run three times with different seeds 14 | 15 | ## How to use 16 | ### Dependencies: 17 | + python3.7+ 18 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 19 | + gym>=0.26.0 20 | + torch 21 | + mujoco>=2.2.2 22 | 23 | ### Start Training: 24 | ```train 25 | # To train an agent for HalfCheetah-v4 game 26 | python train.py 27 | 28 | # To train for other game & params 29 | python train.py --env [ENV_NAME] --alpha [float] --beta [float] --delta [float] 30 | ``` 31 | 32 | ### Reference 33 | + [microsoft/oac-explore](https://github.com/microsoft/oac-explore) 34 | -------------------------------------------------------------------------------- /benchmark/torch/qmix/README.md: -------------------------------------------------------------------------------- 1 | ## QMIX based on Pytorch 2 | We reproduce the QMIX based on **PARL** and **Pytorch**, reaching the same level of indicators as the paper in StarCraftII benchmarks. 3 | ### QMIX 4 | QMIX is a **value-based** multi-agent reinforcement learning algorithm. 5 | See more information about QMIX in: [QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1803.11485) 6 | ### StarCraftII Environment 7 | Paper: [The StarCraft Multi-Agent Challenge](https://arxiv.org/pdf/1902.04043) 8 | Github Repositories: [smac](https://github.com/oxwhirl/smac) 9 | ## Benchmark Results 10 | <img src="images/torch-qmix-result.png" width = "1200" alt="Performance" /> 11 | 12 | - We traineded our model in 5 different scenarios: *"3m", "8m", "2s_3z", "3s_5z"* and *"1c_3s_5z"*. 13 | - The **difficulty** in all scenarios are set to be "7" (very difficult). 14 | - We trainined the model 3 times for each scenario. 15 | 16 | ## How to Use 17 | ### Dependencies 18 | - python>=3.6 19 | - [parl](https://github.com/PaddlePaddle/PARL) 20 | - [smac](https://github.com/oxwhirl/smac) 21 | - Pytorch>=1.6.0 22 | 23 | ### Start Training 24 | 1. Modify the config in `qmix_config.py`. 25 | 2. Start training: 26 | ```bash 27 | python train.py 28 | ``` 29 | 3. View the training process with tensorboard: 30 | ```bash 31 | tensorboard --logdir ./ 32 | ``` 33 | -------------------------------------------------------------------------------- /benchmark/torch/qmix/images/torch-qmix-result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/benchmark/torch/qmix/images/torch-qmix-result.png -------------------------------------------------------------------------------- /benchmark/torch/qmix/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numpy as np 16 | 17 | 18 | class OneHotTransform(object): 19 | def __init__(self, out_dim): 20 | self.out_dim = out_dim 21 | 22 | def __call__(self, agent_id): 23 | assert agent_id < self.out_dim 24 | one_hot_id = np.zeros(self.out_dim, dtype='float32') 25 | one_hot_id[agent_id] = 1.0 26 | return one_hot_id 27 | -------------------------------------------------------------------------------- /benchmark/torch/sac/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce SAC with PARL 2 | Based on PARL, the SAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | > Paper: SAC in [Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor](https://arxiv.org/abs/1801.01290) 5 | 6 | ### Mujoco games introduction 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco). 8 | 9 | ### Benchmark result 10 | 11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/SAC/torch/result.png" alt="SAC_results"/> 12 | 13 | + Each experiment was run three times with different seeds 14 | 15 | ## How to use 16 | ### Dependencies: 17 | + python3.7+ 18 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 19 | + gym>=0.26.0 20 | + torch 21 | + mujoco>=2.2.2 22 | 23 | ### Start Training: 24 | #### Train 25 | ``` 26 | # To train for HalfCheetah-v2(default),Hopper-v4,Walker2d-v4,Ant-v4 27 | # --alpha 0.2(default) 28 | python train.py --env [ENV_NAME] 29 | 30 | # To reproduce the performance of Humanoid-v4 31 | python train.py --env Humanoid-v4 --alpha 0.05 32 | -------------------------------------------------------------------------------- /benchmark/torch/td3/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce TD3 with PARL 2 | Based on PARL, the TD3 algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | Include following approaches: 5 | + Clipped Double Q-learning 6 | + Target Networks and Delayed Policy Update 7 | + Target Policy Smoothing Regularization 8 | 9 | > TD3 in 10 | [Addressing Function Approximation Error in Actor-Critic Methods](https://arxiv.org/abs/1802.09477) 11 | 12 | ### Mujoco games introduction 13 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco). 14 | 15 | ### Benchmark result 16 | 17 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/TD3/torch/result.png" alt="Performance" /> 18 | 19 | ## How to use 20 | ### Dependencies: 21 | + python3.7+ 22 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 23 | + gym>=0.26.0 24 | + torch 25 | + mujoco>=2.2.2 26 | 27 | ### Start Training: 28 | ``` 29 | # To train an agent for HalfCheetah-v4 game 30 | python train.py 31 | 32 | # To train for different game and different loss type 33 | # python train.py --env [ENV_NAME] 34 | -------------------------------------------------------------------------------- /docs/EvoKit/overview.rst: -------------------------------------------------------------------------------- 1 | Overview 2 | ------------------ 3 | 4 | ``EvoKit`` 是一个集合了多种进化算法、兼容多种类预测框架的进化算法库,主打 **快速上线验证** 。 5 | 6 | .. image:: ../../evo_kit/DeepES.gif 7 | :align: center 8 | :width: 400px 9 | 10 | 特性 11 | ######### 12 | 13 | **1. 多种进化算法支持。** 支持高斯采样、CMA、GA等算法,更多算法持续接入中。 14 | 15 | **2. 主流优化器支持。** 支持SGD/Momentum/Adam等多个主流优化器,有效提升算法收敛效率。 16 | 17 | **3. 一站式上线。** 整合了线上采样和线下更新流程, 提供Bcloud/Cmake等编译方式, 助力快速上线。 18 | 19 | **4. 深度学习框架全系列兼容。** 裸写的网络,paddle/lego/Torch等深度学习框架,EvoKit都支持。 20 | 21 | **5. 同步/异步更新方式。** 支持多个采样模型/多份采样数据异步更新,完美契合业务场景。 22 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | -------------------------------------------------------------------------------- /docs/api_docs/index.rst: -------------------------------------------------------------------------------- 1 | .. PARL_docs documentation master file, created by 2 | sphinx-quickstart on Mon Apr 22 11:12:25 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | PARL Documentation 7 | ===================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | utils 13 | -------------------------------------------------------------------------------- /docs/api_docs/utils.rst: -------------------------------------------------------------------------------- 1 | parl.Model 2 | -------------------- 3 | .. automodule:: parl.framework.model_base 4 | :members: 5 | :undoc-members: 6 | :show-inheritance: 7 | -------------------------------------------------------------------------------- /docs/apis/agent.rst: -------------------------------------------------------------------------------- 1 | parl.Agent 2 | ------ 3 | .. autoclass:: parl.core.paddle.agent.Agent 4 | :members: 5 | 6 | -------------------------------------------------------------------------------- /docs/apis/algorithm.rst: -------------------------------------------------------------------------------- 1 | parl.Algorithm 2 | ------ 3 | .. autoclass:: parl.core.paddle.algorithm.Algorithm 4 | :members: 5 | 6 | -------------------------------------------------------------------------------- /docs/apis/connect.rst: -------------------------------------------------------------------------------- 1 | parl.connect 2 | ------ 3 | .. autoclass:: parl.remote.client.connect 4 | :members: 5 | 6 | -------------------------------------------------------------------------------- /docs/apis/model.rst: -------------------------------------------------------------------------------- 1 | parl.Model 2 | ------ 3 | .. autoclass:: parl.core.paddle.model.Model 4 | :members: 5 | 6 | -------------------------------------------------------------------------------- /docs/apis/remote_class.rst: -------------------------------------------------------------------------------- 1 | parl.remote_class 2 | ------ 3 | .. autoclass:: parl.remote.remote_decorator.remote_class 4 | :members: 5 | 6 | -------------------------------------------------------------------------------- /docs/basic_structure/agent.rst: -------------------------------------------------------------------------------- 1 | Agent (*Generate Data Flow*) 2 | =============================== 3 | 4 | Methods 5 | -------- 6 | 1. __init__(self, algorithm, gpu_id=None) 7 | 8 | Call build_program here and run initialization for default_startup_program. 9 | 10 | 2. build_program(self) 11 | 12 | Use define_predict and define_learn in Algorithm to build training program and prediction program. This will be called 13 | by __init__ method in class Agent. 14 | 15 | 3. predict(self, obs) 16 | 17 | Predict the action with current observation of the environment. Note that this function will only do the prediction and it doesn't try any exploration. 18 | To explore in the action space, you should create your process in `sample` function below. 19 | Basically, this function is often used in test process. 20 | 21 | 4. sample(self, obs) 22 | 23 | Predict the action given current observation of the environment. 24 | Additionaly, action will be added noise here to explore a new trajectory. 25 | Basically, this function is often used in training process. 26 | 27 | 5. learn(self, obs, action, reward, next_obs, terminal) 28 | 29 | Pass data to the training program to update model. This method is the training interface for Agent. 30 | -------------------------------------------------------------------------------- /docs/basic_structure/algorithm.rst: -------------------------------------------------------------------------------- 1 | Algorithm (*Backward Part*) 2 | ============================= 3 | 4 | Methods 5 | --------- 6 | 1. define_predict(self, obs) 7 | 8 | Use method policy( ) from Agent to predict the probabilities of actions. 9 | 10 | 2. define_learn(self, obs, action, reward, next_obs, terminal) 11 | 12 | Define loss function and optimizer here to update the policy model. 13 | 14 | An Example 15 | ----------- 16 | 17 | 18 | 19 | .. code-block:: python 20 | :linenos: 21 | 22 | # From https://github.com/PaddlePaddle/PARL/blob/develop/parl/algorithms/policy_gradient.py 23 | 24 | class PolicyGradient(Algorithm): 25 | def __init__(self, model, hyperparas): 26 | Algorithm.__init__(self, model, hyperparas) 27 | self.model = model 28 | self.lr = hyperparas['lr'] 29 | 30 | def define_predict(self, obs): 31 | """ use policy model self.model to predict the action probability 32 | """ 33 | return self.model.policy(obs) 34 | 35 | def define_learn(self, obs, action, reward): 36 | """ update policy model self.model with policy gradient algorithm 37 | """ 38 | act_prob = self.model.policy(obs) 39 | log_prob = layers.cross_entropy(act_prob, action) 40 | cost = log_prob * reward 41 | cost = layers.reduce_mean(cost) 42 | optimizer = fluid.optimizer.Adam(self.lr) 43 | optimizer.minimize(cost) 44 | return cost 45 | -------------------------------------------------------------------------------- /docs/basic_structure/model.rst: -------------------------------------------------------------------------------- 1 | Model (*Forward Part*) 2 | ======================= 3 | A Model is owned by an Algorithm. Model is responsible for the entire network model (**forward part**) for the specific problems. 4 | 5 | 6 | Methods 7 | ---------- 8 | 1. policy(self, obs) 9 | 10 | Define the structure of networks here. Algorithm will call this method to predict probabilities of actions. 11 | It is optional. 12 | 13 | 2. value(self, obs) 14 | 15 | Return: values: a dict of estimated values for the current observations and states. 16 | For example, "q_value" and "v_value". 17 | 18 | 3. sync_params_to(self, target_net, gpu_id, decay=0.0, share_vars_parallel_executor=None) 19 | 20 | This method deepcopied the parameters from the current network to the target network, which two have the same structure. 21 | 22 | An example 23 | ------------ 24 | .. code-block:: python 25 | :linenos: 26 | 27 | class MLPModel(Model): 28 | def __init__(self): 29 | self.fc = layers.fc(size=64) 30 | 31 | def policy(self, obs): 32 | out = self.fc(obs) 33 | return out 34 | 35 | model = MLPModel() 36 | target_model = deepcopy(model) # automatically create new unique parameters names for target_model.fc 37 | 38 | # build program 39 | x = layers.data(name='x', shape=[100], dtype="float32") 40 | y1 = model.policy(x) 41 | y2 = target_model.policy(x) 42 | -------------------------------------------------------------------------------- /docs/basic_structure/overview.rst: -------------------------------------------------------------------------------- 1 | Overview 2 | ========== 3 | Three Components 4 | ------------------ 5 | PARL is made up of three components: **Model, Algorithm, Agent**. They are constructed layer-by-layer to build the main body. 6 | 7 | Model 8 | --------- 9 | A Model is owned by an Algorithm. Model is responsible for the entire network model (**forward part**) for the specific problems. 10 | 11 | Algorithm 12 | ---------- 13 | Algorithm defines the way to update the parameters in the Model (**backward part**). We already implemented some common 14 | used algorithms__, like DQN/DDPG/PPO/A3C, you can directly import and use them. 15 | 16 | .. __: https://github.com/PaddlePaddle/PARL/tree/develop/parl/algorithms 17 | 18 | Agent 19 | -------- 20 | Agent interates with the environment and **generate data flow** outside the Algorithm. 21 | -------------------------------------------------------------------------------- /docs/features.rst: -------------------------------------------------------------------------------- 1 | Features 2 | =========== 3 | 4 | **1. Reproducible** 5 | 6 | | We provide algorithms that reproduce stably the results of many influential reinforcement learning algorithms. 7 | 8 | **2. Large Scale** 9 | 10 | | Ability to support high-performance parallelization of training with thousands of CPUs and multi-GPUs. 11 | 12 | **3. Reusable** 13 | 14 | | Algorithms provided in the repository can be directly adapted to new tasks by defining a forward network and training mechanism will be built automatically. 15 | 16 | **4. Extensible** 17 | 18 | | Build new algorithms quickly by inheriting the abstract class in the framework. 19 | -------------------------------------------------------------------------------- /docs/images/PARL-logo-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/images/PARL-logo-1.png -------------------------------------------------------------------------------- /docs/images/PARL-logo-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/images/PARL-logo-2.png -------------------------------------------------------------------------------- /docs/images/bar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/images/bar.png -------------------------------------------------------------------------------- /docs/images/quickstart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/images/quickstart.png -------------------------------------------------------------------------------- /docs/implementations/a2c.rst: -------------------------------------------------------------------------------- 1 | A2C 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.a2c 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/a3c.rst: -------------------------------------------------------------------------------- 1 | A3C 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.a3c 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/ddpg.rst: -------------------------------------------------------------------------------- 1 | DDPG 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.ddpg 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/ddqn.rst: -------------------------------------------------------------------------------- 1 | DDQN 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.ddqn 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/implementations/dqn.rst: -------------------------------------------------------------------------------- 1 | DQN 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.dqn 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/impala.rst: -------------------------------------------------------------------------------- 1 | IMPALA 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.fluid.impala.impala 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/maddpg.rst: -------------------------------------------------------------------------------- 1 | MADDPG 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.maddpg 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/oac.rst: -------------------------------------------------------------------------------- 1 | OAC 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.oac 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/pg.rst: -------------------------------------------------------------------------------- 1 | Policy Gradient 2 | ================== 3 | 4 | .. automodule:: parl.algorithms.paddle.policy_gradient 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/ppo.rst: -------------------------------------------------------------------------------- 1 | PPO 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.ppo 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/qmix.rst: -------------------------------------------------------------------------------- 1 | QMIX 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.qmix 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/sac.rst: -------------------------------------------------------------------------------- 1 | SAC 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.sac 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/implementations/td3.rst: -------------------------------------------------------------------------------- 1 | TD3 2 | ========== 3 | 4 | .. automodule:: parl.algorithms.paddle.td3 5 | :members: 6 | :no-undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============= 3 | Dependencies 4 | ------------------- 5 | - Python 3.5+(Python 3.8+ is preferable for distributed training). 6 | - `paddlepaddle>=2.0 <https://github.com/PaddlePaddle/Paddle>`_ (**Optional**, if you only want to use APIs related to parallelization alone) 7 | 8 | Install 9 | ------------- 10 | PARL is distributed on PyPI and can be installed with pip: 11 | 12 | .. code-block:: 13 | 14 | pip install parl 15 | 16 | or install from source: 17 | 18 | .. code-block:: 19 | 20 | pip install --upgrade git+https://github.com/PaddlePaddle/PARL.git 21 | -------------------------------------------------------------------------------- /docs/installation_guide.md: -------------------------------------------------------------------------------- 1 | # Installation Guide 2 | 3 | ## Detailed Installation Steps 4 | 5 | 1. **Environment Preparation** 6 | - Supported Python versions: 3.7 - 3.10 (tested on Linux systems). 7 | 8 | 2. **Install the DL Framework** 9 | - For the CPU version, simply run the following command: 10 | ```bash 11 | pip install paddlepaddle 12 | ``` 13 | - For the GPU version: 14 | - On Linux, the maximum supported version is 2.5: 15 | ```bash 16 | pip install paddlepaddle-gpu==2.5 17 | ``` 18 | - On Windows, the maximum supported version is 2.2.1: 19 | ```bash 20 | pip install paddlepaddle-gpu==2.2.1 21 | ``` 22 | 23 | 3. **Adjust Numpy Version** 24 | - After installing PaddlePaddle, if the current numpy version is higher than 1.23.5, reinstall numpy: 25 | ```bash 26 | pip install numpy==1.23.5 27 | ``` 28 | 29 | 4. **Install PARL and Gym** 30 | - Run the following command to install the latest versions of PARL and Gym: 31 | ```bash 32 | pip install parl gym 33 | ``` 34 | 35 | 5. **Test the Installation** 36 | - Use the following command to run the quick-start test script: 37 | ```bash 38 | python examples/QuickStart/train.py 39 | ``` 40 | 41 | --- 42 | -------------------------------------------------------------------------------- /docs/installation_guide_cn.md: -------------------------------------------------------------------------------- 1 | # Installation Guide 2 | 3 | ## 详细安装步骤 4 | 5 | 1. **环境准备** 6 | - 已测试的 Python 版本范围为:3.7 - 3.10(Linux 系统下) 7 | 8 | 2. **dl框架安装** 9 | - 若安装 CPU 版,正常运行以下命令即可: 10 | ```bash 11 | pip install paddlepaddle 12 | ``` 13 | - 若安装 GPU 版: 14 | - 在 Linux 下,支持的最大版本号为 2.5: 15 | ```bash 16 | pip install paddlepaddle-gpu==2.5 17 | ``` 18 | - 在 Windows 下,支持的最大版本号为 2.2.1: 19 | ```bash 20 | pip install paddlepaddle-gpu==2.2.1 21 | ``` 22 | 23 | 3. **Numpy 版本调整** 24 | - 若安装paddle后,当前 numpy 版本高于 1.23.5,需要重新安装: 25 | ```bash 26 | pip install numpy==1.23.5 27 | ``` 28 | 29 | 4. **安装 PARL 和 Gym** 30 | - 运行以下命令安装最新版 PARL 和 Gym: 31 | ```bash 32 | pip install parl gym 33 | ``` 34 | 35 | 5. **测试安装** 36 | - 使用以下命令运行快速启动测试脚本: 37 | ```bash 38 | python examples/QuickStart/train.py 39 | ``` 40 | 41 | --- -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/api_docs.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2019, nlp-ol@baidu.com 3 | # This file is distributed under the same license as the PARL package. 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2021. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PARL \n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2021-05-18 14:23+0800\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" 14 | "Language-Team: LANGUAGE <LL@li.org>\n" 15 | "MIME-Version: 1.0\n" 16 | "Content-Type: text/plain; charset=utf-8\n" 17 | "Content-Transfer-Encoding: 8bit\n" 18 | "Generated-By: Babel 2.9.1\n" 19 | 20 | #: ../../api_docs/index.rst:7 21 | msgid "PARL Documentation" 22 | msgstr "PARL 文档" 23 | 24 | #: ../../api_docs/utils.rst:2 25 | msgid "parl.Model" 26 | msgstr "" 27 | 28 | -------------------------------------------------------------------------------- /docs/overview/abstractions.rst: -------------------------------------------------------------------------------- 1 | Abstractions 2 | ---------------- 3 | .. image:: ../../.github/abstractions.png 4 | :align: center 5 | :width: 400px 6 | 7 | | PARL aims to build an **agent** for training algorithms to perform complex tasks. 8 | | The main abstractions introduced by PARL that are used to build an agent recursively are the following: 9 | 10 | * ``Model`` is abstracted to construct the forward network which defines a policy network or critic network given state as input. 11 | 12 | * ``Algorithm`` describes the mechanism to update parameters in the *model* and often contains at least one model. 13 | 14 | * ``Agent``, a data bridge between the *environment* and the *algorithm*, is responsible for data I/O with the outside environment and describes data preprocessing before feeding data into the training process. 15 | 16 | Note: For more information about base classes, please visit our :doc:`tutorial <../tutorial/getting_started>` and :doc:`API document <../apis/model>`. -------------------------------------------------------------------------------- /docs/overview/features.rst: -------------------------------------------------------------------------------- 1 | Features 2 | ---------------- 3 | 4 | **1. Reproducible** 5 | 6 | | We provide algorithms that reproduce stably the results of many influential reinforcement learning algorithms. 7 | 8 | **2. Large Scale** 9 | 10 | | Ability to support high-performance parallelization of training with thousands of CPUs and multi-GPUs. 11 | 12 | **3. Reusable** 13 | 14 | | Algorithms provided in the repository can be directly adapted to new tasks by defining a forward network and training mechanism will be built automatically. 15 | 16 | **4. Extensible** 17 | 18 | | Build new algorithms quickly by inheriting the abstract class in the framework. 19 | -------------------------------------------------------------------------------- /docs/parallel_training/comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/parallel_training/comparison.png -------------------------------------------------------------------------------- /docs/parallel_training/elapsed_time.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/parallel_training/elapsed_time.jpg -------------------------------------------------------------------------------- /docs/parallel_training/file_distribution.rst: -------------------------------------------------------------------------------- 1 | File Distribution 2 | ================== 3 | 4 | File distribution is an important function of distributed parallel computing. It is responsible for distributing user's code 5 | and configuration files to different machines, so that all machines perform parallel computing using same code. By default, all ``.py`` files that are located in the same directory 6 | as the XPARL distribution main file (such as ``main.py`` ) will be distributed. But sometimes users need to distribute some specific files, such as model files, configuration files, and Python code in subdirectories (submodules for import). 7 | In order to meet this demand, ``parl.connect`` provides an interface where users can directly specify the files or codes that need to be distributed. 8 | 9 | Example: 10 | ################ 11 | 12 | The file directory structure is as follows, we want to distribute the ``.py`` files in the policy folder. We can pass the files that we want to distribute to the ``distributed_files`` parameter when ``connect``, this parameter also supports regular expressions. 13 | 14 | .. code-block:: 15 | 16 | . 17 | ├── main.py 18 | └── policy 19 | ├── agent.py 20 | ├── config.ini 21 | └── __init__.py 22 | 23 | .. code-block:: python 24 | 25 | parl.connect("localhost:8004", distributed_files=['./policy/*.py', './policy/*.ini']) 26 | -------------------------------------------------------------------------------- /docs/parallel_training/poster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/parallel_training/poster.png -------------------------------------------------------------------------------- /docs/questions/distributed_training.rst: -------------------------------------------------------------------------------- 1 | xparl questions 2 | ================================ 3 | Recommended data types in xparl 4 | ############################### 5 | Python is an interpreted language, unlike C++ that compile the codes before running. Thus xparl lacks prior knowledge of the data to be transmitted to a remote class instance, making it inadequate to convert the user-defined data into a binary data stream. Therefore, it is advisable for users to utilize native Python data types such as Int, Dict, and Numpy Array when passing arguments to or retrieving results from a remote class instance. 6 | 7 | RL questions 8 | ================================ 9 | 10 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | paddlepaddle==2.4.2 2 | -------------------------------------------------------------------------------- /docs/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd "$(dirname "$0")" 3 | source ~/.bashrc 4 | export PATH="/root/miniconda3/bin:$PATH" 5 | source deactivate 6 | source activate docs 7 | pip install /work/ 8 | make html 9 | -------------------------------------------------------------------------------- /docs/tutorial/add_histogram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/tutorial/add_histogram.jpg -------------------------------------------------------------------------------- /docs/tutorial/add_scalar.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/tutorial/add_scalar.jpg -------------------------------------------------------------------------------- /docs/tutorial/output_as_csv.rst: -------------------------------------------------------------------------------- 1 | CSV Logger 2 | ========== 3 | 4 | PARL provides a tool to output the indicators during the training process to a CSV table. The tool can be imported using: 5 | 6 | .. code-block:: python 7 | 8 | from parl.utils import CSVLogger 9 | 10 | How to Use 11 | ------------- 12 | 13 | 1. Input path for saving the CSV file and initialize ``CSVLogger``: 14 | 15 | .. code-block:: python 16 | 17 | csv_logger = CSVLogger("result.csv") 18 | 19 | 2. Output a dictionary that contains the indicators: 20 | 21 | `Parameters`: 22 | 23 | * result(dict) - indicators that need to be outputted as CSV file 24 | 25 | `Method`: 26 | 27 | .. code-block:: python 28 | 29 | csv_logger.log_dict({"loss": 1, "reward": 2}) 30 | 31 | Example 32 | ------------- 33 | 34 | .. code-block:: python 35 | 36 | from parl.utils import CSVLogger 37 | 38 | csv_logger = CSVLogger("result.csv") 39 | csv_logger.log_dict({"loss": 1, "reward": 2}) 40 | csv_logger.log_dict({"loss": 3, "reward": 4}) 41 | 42 | The CSV file will contain: 43 | 44 | .. code-block:: 45 | 46 | loss,reward 47 | 1,2 48 | 3,4 49 | -------------------------------------------------------------------------------- /docs/tutorial/save_param.rst: -------------------------------------------------------------------------------- 1 | Save and Restore Parameters 2 | ============================= 3 | 4 | Goal of this tutorial: 5 | 6 | - Learn how to save and restore parameters. 7 | 8 | **Scene 1:** 9 | 10 | Sometimes we need to save the parameters into a file and reuse them later on. PARL provides operators 11 | to save parameters to a file and restore parameters from a file easily. You only need several lines to implement this. 12 | 13 | Here is a demonstration of usage: 14 | 15 | .. code-block:: python 16 | 17 | agent = AtariAgent() 18 | # save the parameters of agent to ./model_dir 19 | agent.save('./model_dir') 20 | # restore the parameters from ./model_dir to agent 21 | agent.restore('./model_dir') 22 | 23 | **Scene 2:** 24 | 25 | Sometimes during training procedure, we want to sync the latest model parameters to Agents (Actors) on other servers. To deal with this, we need to first move the parameters to memory then 26 | set the parameters of Agents (Actors) on other servers. 27 | 28 | .. code-block:: python 29 | 30 | #--------------Agent--------------- 31 | weights = agent.get_weights() 32 | #--------------Remote Actor-------------- 33 | actor.set_weights(weights) -------------------------------------------------------------------------------- /docs/zh_CN/tutorial/csv_logger.md: -------------------------------------------------------------------------------- 1 | # **教程:表格输出实验数据** 2 | PARL提供了将训练过程中的指标输出到CSV表格的工具。工具导入方法: 3 | 4 | `from parl.utils import CSVLogger` 5 | 6 | 7 | ### 使用教程 8 | 1. 传入CSV文件保存路径,并初始化CSVLogger 9 | 10 | `csv_logger = CSVLogger("result.csv")` 11 | 12 | 2. 输出以字典形式记录的指标 13 | 14 | 参数 15 | - result (dict) – 需要输出到CSV文件的指标字典 16 | 17 | 方法 18 | 19 | `csv_logger.log_dict({"loss": 1, "reward": 2})` 20 | 21 | ### 完整例子 22 | ```python 23 | from parl.utils import CSVLogger 24 | 25 | csv_logger = CSVLogger("result.csv") 26 | csv_logger.log_dict({"loss": 1, "reward": 2}) 27 | csv_logger.log_dict({"loss": 3, "reward": 4}) 28 | 29 | ``` 30 | #### 预期结果 31 | 32 | result.csv文件内容如下: 33 | 34 | ``` 35 | loss,reward 36 | 1,2 37 | 3,4 38 | ``` 39 | -------------------------------------------------------------------------------- /docs/zh_CN/tutorial/param.md: -------------------------------------------------------------------------------- 1 | # **教程:模型参数管理** 2 | 场景1: 在训练过程中,我们有时候需要把训练好的模型参数保存到本地,用于后续的部署或者评估。 3 | 4 | 当用户构建好agent之后,可以直接通过agent的相关接口来完成参数的存储。 5 | ```python 6 | agent = AtariAgent() 7 | # 保存参数到 ./model_dir 8 | agent.save('./model_dir') 9 | # 恢复参数到这个agent上 10 | agent.restore('./model_dir') 11 | ``` 12 | 13 | 场景2: 并行训练过程中,经常需要把最新的模型参数同步到另一台服务器上,这时候,需要把模型参数拿到内存中,然后再赋值给另一台机器上的agent(actor)。 14 | 15 | ```python 16 | #--------------Agent--------------- 17 | weights = agent.get_weights() 18 | #--------------Remote Actor-------------- 19 | actor.set_weights(weights) 20 | ``` 21 | 22 | 场景3: 在训练完成后,需要把训练好的模型结构和参数保存到本地,用于后续的推理部署。 23 | 24 | 直接通过agent的相关接口来完成网络结构和参数的存储。 25 | 26 | ```python 27 | # 保存网络结构和参数到./inference_model_dir 28 | agent.save_inference_model('./inference_model_dir', [[None, 128]], ['float32']) 29 | ``` 30 | 31 | 对于Actor-Critic类算法,只需要保存其中的Actor网络。 32 | 33 | ```python 34 | # 保存Actor-Critic算法的策略网络结构和参数到./inference_ac_model_dir 35 | agent.save_inference_model('./inference_ac_model_dir', [[None, 128]], ['float32'], agent.alg.model.actor_model) 36 | ``` 37 | -------------------------------------------------------------------------------- /docs/zh_CN/tutorial/summary.md: -------------------------------------------------------------------------------- 1 | # **教程:绘制训练曲线** 2 | PARL集成了tensorboardX作为可视化的工具。工具导入方法: 3 | 4 | `from parl.utils import summary`。 5 | 6 | 7 | ### 折线图接口:add_scalar 8 | summary.add_scalar(tag, scalar_value, global_step=None) 9 | 10 | 常用的参数 11 | - tag (string) – Data identifier 12 | - scalar_value (float or string/blobname) – Value to save 13 | - global_step (int) – Global step value to record 14 | 15 | 例子: 16 | ```python 17 | from parl.utils import summary 18 | 19 | x = range(100) 20 | for i in x: 21 | summary.add_scalar('y=2x', i * 2, i) 22 | ``` 23 | 预期结果: 24 | 25 | <img src="../../tutorial/add_scalar.jpg" width="300"/> 26 | 27 | ### 柱形接口:add_histogram 28 | 29 | summary.add_histogram(tag, values, global_step=None) 30 | 31 | 常用的参数 32 | - tag (string) – Data identifier 33 | - values (np.ndarray) – histogram of which to be saved 34 | - global_step (int) – Global step value to record 35 | 36 | 例子: 37 | ```python 38 | from parl.utils import summary 39 | import numpy as np 40 | 41 | for i in range(10): 42 | x = np.random.random(1000) 43 | summary.add_histogram('distribution centers', x + i, i) 44 | ``` 45 | 46 | 预期结果: 47 | 48 | <img src="../../tutorial/add_histogram.jpg" width="300"/> 49 | 50 | ### 修改数据保存路径接口 51 | 默认的数据保存路径是:`./train_log`, summary的数据路径和logger绑定到一起的,所以直接修改logger的保存路径即可: 52 | ```python 53 | from parl.utils import logger 54 | logger.set_dir('./train_log/exp1') 55 | ``` 56 | -------------------------------------------------------------------------------- /docs/zh_CN/xparl/.images/dataset1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/dataset1.png -------------------------------------------------------------------------------- /docs/zh_CN/xparl/.images/dataset2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/dataset2.png -------------------------------------------------------------------------------- /docs/zh_CN/xparl/.images/dataset3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/dataset3.png -------------------------------------------------------------------------------- /docs/zh_CN/xparl/.images/dataset4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/dataset4.png -------------------------------------------------------------------------------- /docs/zh_CN/xparl/.images/log_server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/docs/zh_CN/xparl/.images/log_server.png -------------------------------------------------------------------------------- /docs/zh_CN/xparl/debug.md: -------------------------------------------------------------------------------- 1 | # **如何在xparl中debug** 2 | 3 | 经过并行修饰符修饰的类,并没有在本地运行,而是跑在了集群上,相应地,我们也没法在本机上看到打印的log,比如之前的代码。 4 | ```python 5 | import parl 6 | 7 | @parl.remote_class 8 | class Actor(object): 9 | def hello_world(self): 10 | print("Hello world.") 11 | 12 | def add(self, a, b): 13 | return a + b 14 | 15 | # 连接到集群(master节点) 16 | parl.connect("localhost:6006") 17 | 18 | actor = Actor() 19 | actor.hello_world()# 因为计算是放在集群中执行,所以这里不会打印信息 20 | ``` 21 | 22 | 这种情况下,我们应该怎么debug,定位问题呢? 23 | 这里推荐两个方案: 24 | 25 | - 注释并行修饰符 26 | 先不在集群上跑并行,而是在本地跑起来,根据输出的日志debug,调试通过后再增加并行修饰符。但是这种方法在静态图的神经网络框架中可能会引发静态图重复定义的问题,所以在用paddle或者tensorflow的时候不建议采用这种方法。 27 | 28 | - 根据xparl的日志服务查看 29 | 在本地脚本连接到xparl集群之后,xparl会在程序中输出logserver的地址,通过浏览器访问这个网站即可实时查看每个并行任务的对应输出。 30 | 31 | <img src="./.images/log_server.png" width="500"/> 32 | -------------------------------------------------------------------------------- /docs/zh_CN/xparl/distribute_files.md: -------------------------------------------------------------------------------- 1 | # **如何在xparl中分发本地文件** 2 | 3 | 文件分发是分布式并行计算的重要功能。它负责把用户的代码还有配置文件分发到不同的机器上,让所有的机器都运行同样的代码进行并行计算。默认情况下,XPARL分发主文件所在目录下,所有py结尾文件。但是有时候用户需要分发一些特定的文件,比如模型文件、配置文件、子目录下的Python代码(用于import的子模块)。为了满足这个需求,parl.connect提供了接口,用户可直接指定需要分发的文件或代码。 4 | 5 | ### 例子 6 | 7 | 文件目录结构如下,我们想分发policy文件夹中的py文件。 8 | 我们可以在connect的时候传入想要分发的文件到`distributed_files`参数中,该参数支持正则表达式。 9 | 10 | ``` 11 | . 12 | ├── main.py 13 | └── policy 14 | ├── agent.py 15 | ├── config.ini 16 | └── __init__.py 17 | ``` 18 | 19 | ```python 20 | parl.connect("localhost:8004", distributed_files=['./policy/*.py', './policy/*.ini']) 21 | ``` 22 | -------------------------------------------------------------------------------- /docs/zh_CN/xparl/introduction.md: -------------------------------------------------------------------------------- 1 | # **PARL并行核心介绍: XPARL** 2 | 3 | 随着强化学习能够解决的问题变得复杂,算法对于训练数据的需求也变得更大,为了提升算法的训练速度,PARL借鉴了[Pyro4](https://github.com/irmen/Pyro4)的设计理念,提出了更为高效实用的并行接口。 4 | 5 | ### 简单易用 6 | 通过一个简单的修饰符`@parl.remote_class`,用户就可以很简单地实现并行计算,无需关注繁琐的多进程通讯以及网络通讯,也不受Python多线程GIL锁的限制。 7 | 8 | ### 高性能 9 | `@parl.remote_class` 可以让我们实现真正意义上的多线程并发计算(堪比C++的多线程)。正如下图a所示,python原生的多线程加速表现很糟糕(由于全局锁GIL的存在),但是我们可以看到,PARL的并行可以线性地减少运行时间,从而提升并发效率。 10 | 11 | ### Web 页面监控集群信息 12 | 在多机并行计算的时候,PARL在启动集群的时候提供了web服务,用户可以通过这个页面查看每台机器上的内存、CPU使用率等,同时也可以查看每个任务占用了多少集群资源。 13 | 14 | ### 全框架兼容 15 | PARL的并行可以兼容目前市场上的任何深度学习框架,比如tensorflow、pytorch、mxnet等。通过增加并行修饰符`@parl.remote_class`,用户就可以把他们之前的代码转换成并行代码。 16 | 17 | 18 | # 为什么用PARL 19 | 20 | ## 高吞吐量、高并发 21 | PARL在实现底层的并行计算时,是通过端到端的这种网络传输,也就是在进行并发任务时,没有额外的网络损耗。这种并行设计,相比于RLlib需要通过Redis进行数据中转,PARL在同样的时间内,有更高的数据吞吐量。根据我们之前做的实验对比,运行同样的IMPALA算法,在同样的机器上,PARL的并行性能是更优秀的。 22 | 23 | 24 | ## 自动分发本地文件 25 | 市面上的并行框架大部分得要用户手动同步文件才可以跑起并行代码,比如配置文件得要手动或者通过命令分发到不同机器,parl可以自动分发当前目录下的代码文件,实现无缝的多机并行。 26 | 27 | <img src="../../parallel_training/comparison.png" width="1000"/> 28 | -------------------------------------------------------------------------------- /docs/zh_CN/xparl/serialize.md: -------------------------------------------------------------------------------- 1 | # **序列化/反序列化加速(非必须)** 2 | 3 | PARL默认使用cloudpickle库进行数据的序列化和反序列化【数据是以序列化后的字节流形式在xparl中进行传输】;如果Python环境有下载pyarrow库的话,则会使用pyarrow库进行序列化和反序列化(由于pyarrow兼容性不够好,PARL不会默认下载该库)。 4 | 5 | 不同数据场景下,pyarrow和cloudpickle的表现优劣不同,用户可以基于自己的使用场景选择是否要下载pyarrow库,一般而言,使用python3.8+自带的序列化协议已经可以满足大部分场景的需求。 6 | 7 | ### 性能对比 8 | 这里提供了pyarrow和cloudpickle在不同数据下的序列化和反序列化的平均耗时作为参考: 9 | 10 | - 测试数据一:`data = [np.random.RandomState(0).randn(50, 50)] * 10` 11 | - 测试数据二:`data = [np.random.RandomState(0).randn(500, 500)] * 10` 12 | - 测试数据三:`data = [np.random.RandomState(0).randn(5000, 5000)] * 10` 13 | - 测试数据四:`data = np.random.RandomState(0).randn(5000, 50000)` 14 | 15 | > pyarrow版本:python2使用pyarrow==0.16.0,python3中使用pyarrow==2.0.0 16 | 17 | <img src="./.images/dataset1.png" width="400"/> <img src="./.images/dataset2.png" width="400"/> 18 | <img src="./.images/dataset3.png" width="400"/> <img src="./.images/dataset4.png" width="400"/> 19 | 20 | ### 对比结论 21 | - 在序列化/反序列化**超大Numpy矩阵**时,pyarrow表现明显比cloudpickle好 22 | - 使用3.8+版本的Python也能提升序列化性能。(主要是python3.8+版本对pickle进行了升级,支持protocol=5) 23 | -------------------------------------------------------------------------------- /evo_kit/DeepES.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/evo_kit/DeepES.gif -------------------------------------------------------------------------------- /evo_kit/README.md: -------------------------------------------------------------------------------- 1 | # EvoKit 2 | EvoKit 是一个集合了多种进化算法、兼容多种类预测框架的进化算法库,主打快速上线验证 。 3 | <p align="center"> 4 | <img src="DeepES.gif" alt="PARL" width="500"/> 5 | </p> 6 | 7 | ## 使用示范 8 | ```c++ 9 | //实例化一个预测,根据配置文件加载模型,采样方式(Gaussian\CMA sampling..)、更新方式(SGD\Adam)等 10 | auto agent = ESAgent(config); 11 | 12 | for (int i = 0; i < 10; ++i) { 13 | auto sampling_agnet = agent->clone(); // clone出一个sampling agent 14 | SamplingInfo info; 15 | sampling_agent->add_noise(info); // 参数扰动,同时保存随机种子到info中 16 | int reward = evaluate(env, sampling_agent); //评估参数 17 | noisy_info.push_back(info); // 记录随机噪声对应种子 18 | noisy_rewards.push_back(reward); // 记录评估结果 19 | } 20 | //根据评估结果、随机种子更新参数,然后重复以上过程,直到收敛。 21 | agent->update(noisy_info, noisy_rewards); 22 | ``` 23 | 24 | ## 一键运行demo列表 25 | - sh ./scripts/build.sh 26 | 27 | ## 相关依赖: 28 | - Protobuf2 29 | - OpenMP 30 | - [glog](https://github.com/gflags/gflags/blob/master/INSTALL.md) 31 | - [gflag](https://github.com/google/glog) 32 | 33 | ## 额外依赖: 34 | 35 | ### 使用PaddleLite 36 | 下载PaddleLite的X86预编译库,或者编译PaddleLite源码,得到inference_lite_lib文件夹,放在当前目录中。(可参考:[PaddleLite使用X86预测部署](https://paddle-lite.readthedocs.io/zh/latest/demo_guides/x86.html)) 37 | 38 | ### 使用torch 39 | 下载[libtorch](https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip)或者编译torch源码,得到libtorch文件夹,放在当前目录中。 40 | -------------------------------------------------------------------------------- /evo_kit/core/include/evo_kit/optimizer_factory.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef EVO_KIT_OPTIMIZER_FACTORY_H 16 | #define EVO_KIT_OPTIMIZER_FACTORY_H 17 | 18 | #include <algorithm> 19 | #include <glog/logging.h> 20 | #include <memory> 21 | #include "evo_kit/adam_optimizer.h" 22 | #include "evo_kit/evo_kit.pb.h" 23 | #include "evo_kit/optimizer.h" 24 | #include "evo_kit/sgd_optimizer.h" 25 | 26 | namespace evo_kit { 27 | /* @brief: create an optimizer according to the configuration" 28 | * @args: 29 | * config: configuration for the optimizer 30 | * 31 | */ 32 | std::shared_ptr<Optimizer> create_optimizer(const OptimizerConfig& optimizer_config); 33 | 34 | } // namespace 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /evo_kit/core/include/evo_kit/sampling_factory.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef EVO_KIT_SAMPLING_FACTORY_H 16 | #define EVO_KIT_SAMPLING_FACTORY_H 17 | 18 | #include <algorithm> 19 | #include <glog/logging.h> 20 | #include <memory> 21 | #include "evo_kit/cached_gaussian_sampling.h" 22 | #include "evo_kit/evo_kit.pb.h" 23 | #include "evo_kit/gaussian_sampling.h" 24 | #include "evo_kit/sampling_method.h" 25 | 26 | namespace evo_kit { 27 | /* @brief: create an sampling_method according to the configuration" 28 | * @args: 29 | * config: configuration for the EvoKit 30 | * 31 | */ 32 | std::shared_ptr<SamplingMethod> create_sampling_method(const EvoKitConfig& Config); 33 | 34 | } // namespace 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /evo_kit/core/src/sampling_factory.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "evo_kit/sampling_factory.h" 16 | 17 | namespace evo_kit { 18 | 19 | 20 | std::shared_ptr<SamplingMethod> create_sampling_method(const EvoKitConfig& config) { 21 | std::shared_ptr<SamplingMethod> sampling_method; 22 | bool cached = config.gaussian_sampling().cached(); 23 | 24 | if (cached) { 25 | sampling_method = std::make_shared<CachedGaussianSampling>(); 26 | } else { 27 | sampling_method = std::make_shared<GaussianSampling>(); 28 | } 29 | 30 | bool success = sampling_method->load_config(config); 31 | 32 | if (success) { 33 | return sampling_method; 34 | } else { 35 | LOG(ERROR) << "[EvoKit] Fail to create sampling_method"; 36 | return nullptr; 37 | } 38 | 39 | } 40 | 41 | }//namespace 42 | -------------------------------------------------------------------------------- /evo_kit/demo/cartpole_config.prototxt: -------------------------------------------------------------------------------- 1 | seed: 1024 2 | gaussian_sampling { 3 | std: 0.5 4 | cached: true 5 | cache_size: 100000 6 | } 7 | optimizer { 8 | type: "Adam" 9 | base_lr: 0.05 10 | momentum: 0.9 11 | beta1: 0.9 12 | beta2: 0.999 13 | epsilon: 1e-08 14 | } 15 | async_es { 16 | model_iter_id: 0 17 | } 18 | -------------------------------------------------------------------------------- /evo_kit/demo/paddle/cartpole_init_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/evo_kit/demo/paddle/cartpole_init_model.zip -------------------------------------------------------------------------------- /evo_kit/demo/torch/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 2.6) 2 | project (EvoKit_demo) 3 | set(TARGET parallel_main) 4 | 5 | set(CMAKE_CXX_STANDARD 11) 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 7 | set(CMAKE_CXX_EXTENSIONS OFF) 8 | 9 | find_package(OpenMP) 10 | if (OPENMP_FOUND) 11 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 12 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 13 | set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 14 | endif() 15 | 16 | list(APPEND CMAKE_PREFIX_PATH "./libtorch") 17 | find_package(Torch REQUIRED ON) 18 | set(demo "${PROJECT_SOURCE_DIR}/cartpole_solver_parallel.cc") 19 | 20 | 21 | ########## main ########## 22 | add_executable(${TARGET} ${demo} ${framework_src}) 23 | target_link_libraries(${TARGET} gflags protobuf pthread glog) 24 | 25 | ########## Torch libraries ########## 26 | target_link_libraries(${TARGET} "${TORCH_LIBRARIES}") 27 | 28 | 29 | ########## EvoKit libraries ########## 30 | list(APPEND CMAKE_PREFIX_PATH "./libevokit/cmake/Torch") 31 | find_package(EvoKit) 32 | target_link_libraries(${TARGET} "${EVOKIT_LIBRARY}") 33 | -------------------------------------------------------------------------------- /evo_kit/scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sh scripts/lib_install.sh torch 4 | 5 | cd demo/torch 6 | 7 | #---------------libtorch-------------# 8 | if [ ! -d "./libtorch" ];then 9 | echo "Cannot find the torch library: ./libtorch" 10 | echo "Downloading Torch library" 11 | wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip 12 | unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip 13 | rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip 14 | echo "Torch library Downloaded" 15 | fi 16 | 17 | 18 | #---------------libevokit-------------# 19 | cp -r ../../libevokit ./ 20 | if [ ! -d "./libevokit" ];then 21 | echo "Cannot find the EvoKit library: ./libevokit" 22 | echo "Please put the EvoKit libraray to current folder according the instruction in README" # TODO: readme 23 | exit 1 24 | fi 25 | 26 | # proto 27 | cp ../cartpole_config.prototxt ./ 28 | 29 | #----------------build---------------# 30 | rm -rf build 31 | mkdir build 32 | cd build 33 | cmake ../ 34 | make -j10 35 | cd - 36 | 37 | #-----------------run----------------# 38 | ./build/parallel_main 39 | 40 | 41 | cd ../.. 42 | -------------------------------------------------------------------------------- /evo_kit/scripts/lib_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# != 1 ]; then 4 | echo "You must choose one framework (paddle/torch) to compile EvoKit." 5 | exit 0 6 | fi 7 | 8 | if [ $1 = "paddle" ]; then 9 | #---------------paddlelite-------------# 10 | if [ ! -d "./inference_lite_lib" ];then 11 | echo "Cannot find the PaddleLite library: ./inference_lite_lib" 12 | echo "Please put the PaddleLite libraray to current folder according the instruction in README" 13 | exit 1 14 | fi 15 | 16 | # Initialization model 17 | if [ ! -d ./demo/paddle/cartpole_init_model ]; then 18 | unzip ./demo/paddle/cartpole_init_model.zip -d ./demo/paddle/ 19 | fi 20 | 21 | FLAGS=" -DWITH_PADDLE=ON" 22 | elif [ $1 = "torch" ]; then 23 | FLAGS=" -DWITH_TORCH=ON" 24 | else 25 | echo "Invalid arguments. [paddle/torch]" 26 | exit 0 27 | fi 28 | 29 | 30 | #----------------protobuf-------------# 31 | cd core/proto/ 32 | protoc evo_kit/evo_kit.proto --cpp_out . 33 | cd - 34 | 35 | #----------------build---------------# 36 | echo ${FLAGS} 37 | rm -rf build 38 | mkdir build 39 | cd build 40 | cmake ../ ${FLAGS} 41 | make -j10 42 | make install 43 | cd - 44 | -------------------------------------------------------------------------------- /evo_kit/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 2.6) 2 | project (EvoKit_demo) 3 | set(TARGET unit_test_main) 4 | 5 | set(CMAKE_CXX_STANDARD 11) 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 7 | set(CMAKE_CXX_EXTENSIONS OFF) 8 | 9 | find_package(GTest REQUIRED) 10 | find_package(OpenMP) 11 | if (OPENMP_FOUND) 12 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 13 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 14 | set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 15 | endif() 16 | 17 | # Torch lib 18 | list(APPEND CMAKE_PREFIX_PATH "../libtorch") 19 | find_package(Torch REQUIRED ON) 20 | 21 | # include and source 22 | include_directories("${PROJECT_SOURCE_DIR}/include") 23 | file(GLOB test_src "${PROJECT_SOURCE_DIR}/src/*.cc") 24 | 25 | # make 26 | add_executable(${TARGET} "unit_test.cc" ${core_src} ${agent_src} ${test_src}) 27 | target_link_libraries(${TARGET} gflags protobuf pthread glog gtest "${TORCH_LIBRARIES}") 28 | 29 | 30 | ########## EvoKit libraries ########## 31 | list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/libevokit/cmake/Torch") 32 | find_package(EvoKit) 33 | target_link_libraries(${TARGET} "${EVOKIT_LIBRARY}") 34 | -------------------------------------------------------------------------------- /evo_kit/test/prototxt/torch_sin_cached_config.prototxt: -------------------------------------------------------------------------------- 1 | seed : 1024 2 | 3 | gaussian_sampling { 4 | std: 0.005 5 | cached: true 6 | cache_size : 100000 7 | } 8 | 9 | optimizer { 10 | type: "Adam", 11 | base_lr: 0.005, 12 | momentum: 0.9, 13 | beta1: 0.9, 14 | beta2: 0.999, 15 | epsilon: 1e-8, 16 | } 17 | -------------------------------------------------------------------------------- /evo_kit/test/prototxt/torch_sin_config.prototxt: -------------------------------------------------------------------------------- 1 | seed : 1024 2 | 3 | gaussian_sampling { 4 | std: 0.005 5 | cached: false 6 | } 7 | 8 | optimizer { 9 | type: "Adam", 10 | base_lr: 0.005, 11 | momentum: 0.9, 12 | beta1: 0.9, 13 | beta2: 0.999, 14 | epsilon: 1e-8, 15 | } 16 | -------------------------------------------------------------------------------- /evo_kit/test/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH 3 | 4 | #---------------libtorch-------------# 5 | if [ ! -d "./libtorch" ];then 6 | echo "Cannot find the torch library: ../libtorch" 7 | echo "Downloading Torch library" 8 | wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip 9 | unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip 10 | rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip 11 | echo "Torch library Downloaded" 12 | fi 13 | 14 | #----------------protobuf-------------# 15 | cd core/proto/ 16 | protoc evo_kit/evo_kit.proto --cpp_out . 17 | cd - 18 | 19 | #----------------build---------------# 20 | sh scripts/lib_install.sh torch 21 | 22 | #----------------build test---------------# 23 | cd test 24 | 25 | cp -r ../libevokit ./ 26 | if [ ! -d "./libevokit" ];then 27 | echo "Cannot find the EvoKit library: ./libevokit" 28 | echo "Please put the EvoKit libraray to current folder according the instruction in README" # TODO: readme 29 | exit 1 30 | fi 31 | 32 | rm -rf build 33 | mkdir build 34 | cd build 35 | cmake ../ 36 | make -j10 37 | 38 | #-----------------run----------------# 39 | ./unit_test_main 40 | 41 | cd .. 42 | -------------------------------------------------------------------------------- /evo_kit/test/src/utils_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "gtest/gtest.h" 16 | #include <vector> 17 | #include "evo_kit/utils.h" 18 | 19 | namespace evo_kit { 20 | 21 | // Tests that the Utils::compute_centered_rank() method. 22 | TEST(UtilsTest, Method_compute_centered_ranks) { 23 | float a[5] = {9.0, 8.0, 7.0, 6.0, 5.0}; 24 | std::vector<float> reward_vec(a, a+5); 25 | EXPECT_EQ(compute_centered_ranks(reward_vec), true); 26 | } 27 | 28 | 29 | } // namespace 30 | 31 | -------------------------------------------------------------------------------- /evo_kit/test/unit_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "gtest/gtest.h" 16 | 17 | int main(int argc, char **argv) { 18 | ::testing::InitGoogleTest(&argc, argv); 19 | return RUN_ALL_TESTS(); 20 | } 21 | -------------------------------------------------------------------------------- /examples/A2C/.result/result_a2c_paddle0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/A2C/.result/result_a2c_paddle0.png -------------------------------------------------------------------------------- /examples/A2C/.result/result_a2c_paddle1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/A2C/.result/result_a2c_paddle1.png -------------------------------------------------------------------------------- /examples/A2C/requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.12.1 2 | atari-py==0.1.7 3 | opencv-python 4 | parl>=1.4.3 5 | paddlepaddle 6 | -------------------------------------------------------------------------------- /examples/AlphaZero/.pic/good_moves_rate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/AlphaZero/.pic/good_moves_rate.png -------------------------------------------------------------------------------- /examples/AlphaZero/.pic/perfect_moves_rate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/AlphaZero/.pic/perfect_moves_rate.png -------------------------------------------------------------------------------- /examples/AlphaZero/requirements.txt: -------------------------------------------------------------------------------- 1 | parl==1.4.3 2 | tqdm 3 | paddlepaddle==2.0.0 4 | -------------------------------------------------------------------------------- /examples/Baselines/GridDispatch_competition/README.md: -------------------------------------------------------------------------------- 1 | ## Baselines for grid dispatching competition 2 | 3 | Competition link: [国家电网调控AI创新大赛:电网运行组织智能安排](https://aistudio.baidu.com/aistudio/competition/detail/111) 4 | 5 | We provide a distributed SAC baseline based on PARL with paddlepaddle or torch: 6 | - [paddlepaddle baseline](paddle) 7 | - [torch baseline](torch) 8 | -------------------------------------------------------------------------------- /examples/Baselines/Halite_competition/paddle/encode_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import base64 16 | import pickle 17 | import paddle 18 | 19 | if __name__ == '__main__': 20 | 21 | model = paddle.load('./model/latest_ship_model.pth') 22 | actor = model['actor'] 23 | 24 | for name, param in actor.items(): 25 | actor[name] = param.numpy() 26 | 27 | model_byte = base64.b64encode(pickle.dumps(actor)) 28 | with open('./model/actor.txt', 'wb') as f: 29 | f.write(model_byte) 30 | -------------------------------------------------------------------------------- /examples/Baselines/Halite_competition/paddle/model/latest_ship_model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/Baselines/Halite_competition/paddle/model/latest_ship_model.pth -------------------------------------------------------------------------------- /examples/Baselines/Halite_competition/paddle/requirements.txt: -------------------------------------------------------------------------------- 1 | parl>=2.0.0 2 | paddlepaddle>=2.0.0 3 | -------------------------------------------------------------------------------- /examples/Baselines/Halite_competition/torch/encode_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import base64 16 | import pickle 17 | import torch 18 | 19 | if __name__ == '__main__': 20 | 21 | model = torch.load('./model/latest_ship_model.pth') 22 | actor = model["actor"] 23 | 24 | for name, param in actor.items(): 25 | actor[name] = param.numpy() 26 | 27 | model_byte = base64.b64encode(pickle.dumps(actor)) 28 | with open('./model/actor.txt', 'wb') as f: 29 | f.write(model_byte) 30 | -------------------------------------------------------------------------------- /examples/Baselines/Halite_competition/torch/model/latest_ship_model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/Baselines/Halite_competition/torch/model/latest_ship_model.pth -------------------------------------------------------------------------------- /examples/Baselines/Halite_competition/torch/requirements.txt: -------------------------------------------------------------------------------- 1 | parl>=2.0.0 2 | torch>=1.6.0 3 | -------------------------------------------------------------------------------- /examples/CARLA_SAC/.benchmark/Lane_bend.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/CARLA_SAC/.benchmark/Lane_bend.gif -------------------------------------------------------------------------------- /examples/CARLA_SAC/.benchmark/carla_sac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/CARLA_SAC/.benchmark/carla_sac.png -------------------------------------------------------------------------------- /examples/CARLA_SAC/model.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/CARLA_SAC/model.ckpt -------------------------------------------------------------------------------- /examples/CQL/requirements.txt: -------------------------------------------------------------------------------- 1 | paddlepaddle>=2.0.0 2 | gym==0.20.0 3 | parl>=2.0.0 4 | mujoco_py==2.0.2.8 5 | git+https://github.com/rail-berkeley/d4rl@master#egg=d4rl 6 | -------------------------------------------------------------------------------- /examples/DDPG/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce DDPG with PARL 2 | Based on PARL, the DDPG algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | > Paper: DDPG in [Continuous control with deep reinforcement learning](https://arxiv.org/abs/1509.02971) 5 | 6 | ### Mujoco games introduction 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco). 8 | 9 | ### Benchmark result 10 | 11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/DDPG/paddle/result.png" width="600" alt="DDPG_results"/> 12 | + Each experiment was run three times with different seeds 13 | 14 | ## How to use 15 | ### Dependencies: 16 | + python3.7+ 17 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 18 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle) 19 | + gym>=0.26.0 20 | + mujoco>=2.2.2 21 | 22 | ### Start Training: 23 | ``` 24 | # To train an agent for HalfCheetah-v4 game 25 | # python train.py 26 | 27 | # To train for other game 28 | # python train.py --env [ENV_NAME] 29 | -------------------------------------------------------------------------------- /examples/DDPG/requirements.txt: -------------------------------------------------------------------------------- 1 | parl>=2.1.1 2 | paddlepaddle>=2.0.0 3 | gym>=0.26.0 4 | mujoco>=2.2.2 5 | -------------------------------------------------------------------------------- /examples/DQN/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce DQN with PARL 2 | Based on PARL, we provide a simple example of the DQN algorithm. 3 | 4 | + Paper: DQN in [Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) 5 | 6 | ### Result 7 | 8 | Performance of DQN playing CartPole-v0 9 | 10 | <p align="left"> 11 | <img src="../QuickStart/performance.gif" alt="result" height="175"/> 12 | <img src="cartpole.jpg" alt="result" height="175"/> 13 | </p> 14 | 15 | ## How to use 16 | ### Dependencies: 17 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle) 18 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 19 | + gym 20 | + pygame 21 | 22 | 23 | ### Start Training: 24 | ``` 25 | # To train a model for CartPole-v0 game 26 | python train.py 27 | ``` 28 | 29 | ## DQN-Variants 30 | 31 | For DQN variants such as Double DQN and Dueling DQN, please check [here](https://github.com/PaddlePaddle/PARL/tree/develop/examples/DQN_variant) 32 | -------------------------------------------------------------------------------- /examples/DQN/cartpole.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/DQN/cartpole.jpg -------------------------------------------------------------------------------- /examples/DQN/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | pygame 3 | parl>=2.1.1 4 | paddlepaddle>=2.0.0 5 | -------------------------------------------------------------------------------- /examples/DQN_variant/.benchmark/Dueling DQN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/DQN_variant/.benchmark/Dueling DQN.png -------------------------------------------------------------------------------- /examples/DQN_variant/requirements.txt: -------------------------------------------------------------------------------- 1 | paddlepaddle>=2.0.0 2 | parl>=2.1.1 3 | gym==0.18.0 4 | tqdm 5 | opencv-python 6 | atari-py==0.2.6 7 | -------------------------------------------------------------------------------- /examples/ES/es.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import parl 16 | 17 | __all__ = ['ES'] 18 | 19 | 20 | class ES(parl.Algorithm): 21 | def __init__(self, model): 22 | """ES algorithm. 23 | 24 | Since parameters of the model is updated in the numpy level, `learn` function is not needed 25 | in this algorithm. 26 | 27 | Args: 28 | model(`parl.Model`): policy model of ES algorithm. 29 | """ 30 | self.model = model 31 | 32 | def predict(self, obs): 33 | """Use the policy model to predict actions of observations. 34 | 35 | Args: 36 | obs(layers.data): data layer of observations. 37 | 38 | Returns: 39 | tensor of predicted actions. 40 | """ 41 | 42 | return self.model(obs) 43 | -------------------------------------------------------------------------------- /examples/ES/es_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | config = { 16 | #========== remote config ========== 17 | 'master_address': 'localhost:8837', 18 | #========== env config ========== 19 | 'env_name': 'HalfCheetah-v4', 20 | #========== actor config ========== 21 | 'actor_num': 24, 22 | 'action_noise_std': 0.01, 23 | 'min_task_runtime': 0.2, 24 | 'eval_prob': 0.003, 25 | 'filter_update_prob': 0.01, 26 | 27 | #========== learner config ========== 28 | 'stepsize': 0.01, 29 | 'train_steps': 200, 30 | 'min_episodes_per_batch': 1000, 31 | 'min_steps_per_batch': 10000, 32 | 'noise_size': 200000000, 33 | 'noise_stdev': 0.02, 34 | 'l2_coeff': 0.005, 35 | 'report_window_size': 10, 36 | } 37 | -------------------------------------------------------------------------------- /examples/ES/noise.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py 5 | 6 | import numpy as np 7 | 8 | 9 | class SharedNoiseTable(object): 10 | """Shared noise table used by learner and actor. 11 | 12 | Learner and actor will create a same noise table by passing the same seed. 13 | With the same noise table, learner and actor can communicate the noises by 14 | index of noise table instead of numpy array of noises. 15 | """ 16 | 17 | def __init__(self, noise_size, seed=1024): 18 | self.noise_size = noise_size 19 | self.seed = seed 20 | self.noise = self._create_noise() 21 | 22 | def _create_noise(self): 23 | noise = np.random.RandomState(self.seed).randn(self.noise_size).astype( 24 | np.float32) 25 | return noise 26 | 27 | def get(self, i, dim): 28 | return self.noise[i:i + dim] 29 | 30 | def sample_index(self, dim): 31 | return np.random.randint(0, len(self.noise) - dim + 1) 32 | -------------------------------------------------------------------------------- /examples/ES/requirements.txt: -------------------------------------------------------------------------------- 1 | paddlepaddle>=2.0.0 2 | parl>=2.1.1 3 | gym>=0.26.0 4 | mujoco>=2.2.2 5 | -------------------------------------------------------------------------------- /examples/IMPALA/requirements.txt: -------------------------------------------------------------------------------- 1 | paddlepaddle>=2.0.0 2 | parl>=2.1.1 3 | gym==0.12.1 4 | atari-py==0.1.7 5 | opencv-python 6 | -------------------------------------------------------------------------------- /examples/MADDPG/requirements.txt: -------------------------------------------------------------------------------- 1 | PettingZoo==1.17.0 2 | gym==0.23.1 3 | parl>=2.1.1 4 | paddlepaddle 5 | -------------------------------------------------------------------------------- /examples/NeurIPS2018-AI-for-Prosthetics-Challenge/final_submit/pelvisBasedObs_scaler.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/final_submit/pelvisBasedObs_scaler.npz -------------------------------------------------------------------------------- /examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/competition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/competition.png -------------------------------------------------------------------------------- /examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/curriculum-learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/curriculum-learning.png -------------------------------------------------------------------------------- /examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/demo.gif -------------------------------------------------------------------------------- /examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/fastest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/fastest.png -------------------------------------------------------------------------------- /examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/last course.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/last course.png -------------------------------------------------------------------------------- /examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/velocity_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/image/velocity_distribution.png -------------------------------------------------------------------------------- /examples/NeurIPS2018-AI-for-Prosthetics-Challenge/pelvisBasedObs_scaler.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/pelvisBasedObs_scaler.npz -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/final_submit/official_obs_scaler.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2019-Learn-to-Move-Challenge/final_submit/official_obs_scaler.npz -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/image/performance.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2019-Learn-to-Move-Challenge/image/performance.gif -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/official_obs_scaler.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2019-Learn-to-Move-Challenge/official_obs_scaler.npz -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/replay_memory.py: -------------------------------------------------------------------------------- 1 | ../NeurIPS2018-AI-for-Prosthetics-Challenge/replay_memory.py -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/eval_difficulty1.sh: -------------------------------------------------------------------------------- 1 | # use which GPU 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | python evaluate.py --actor_num 160 \ 5 | --difficulty 1 \ 6 | --penalty_coeff 3.0 \ 7 | --saved_models_dir ./output/difficulty1/model_every_100_episodes \ 8 | --evaluate_times 300 9 | -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/eval_difficulty2.sh: -------------------------------------------------------------------------------- 1 | # use which GPU 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | 5 | python evaluate.py --actor_num 160 \ 6 | --difficulty 2 \ 7 | --penalty_coeff 5.0 \ 8 | --saved_models_dir ./output/difficulty2/model_every_100_episodes \ 9 | --evaluate_times 300 10 | -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/eval_difficulty3.sh: -------------------------------------------------------------------------------- 1 | # use which GPU 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | python evaluate.py --actor_num 160 \ 5 | --difficulty 3 \ 6 | --vel_penalty_coeff 3.0 \ 7 | --penalty_coeff 2.0 \ 8 | --saved_models_dir ./output/difficulty3/model_every_100_episodes \ 9 | --evaluate_times 300 10 | -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/eval_difficulty3_first_target.sh: -------------------------------------------------------------------------------- 1 | # use which GPU 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | python evaluate.py --actor_num 160 \ 5 | --difficulty 3 \ 6 | --vel_penalty_coeff 3.0 \ 7 | --penalty_coeff 3.0 \ 8 | --only_first_target \ 9 | --saved_models_dir ./output/difficulty3_first_target/model_every_100_episodes \ 10 | --evaluate_times 300 11 | -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/train_difficulty1.sh: -------------------------------------------------------------------------------- 1 | echo `which python` 2 | if [ $# != 1 ]; then 3 | echo "Usage: sh train_difficulty1.sh [RESTORE_MODEL_PATH]" 4 | exit 0 5 | fi 6 | 7 | # use which GPU 8 | export CUDA_VISIBLE_DEVICES=0 9 | 10 | python train.py --actor_num 300 \ 11 | --difficulty 1 \ 12 | --penalty_coeff 3.0 \ 13 | --logdir ./output/difficulty1 \ 14 | --restore_model_path $1 15 | -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/train_difficulty2.sh: -------------------------------------------------------------------------------- 1 | if [ $# != 1 ]; then 2 | echo "Usage: sh train_difficulty2.sh [RESTORE_MODEL_PATH]" 3 | exit 0 4 | fi 5 | 6 | # use which GPU 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | python train.py --actor_num 300 \ 10 | --difficulty 2 \ 11 | --penalty_coeff 5.0 \ 12 | --logdir ./output/difficulty2 \ 13 | --restore_model_path $1 14 | -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/train_difficulty3.sh: -------------------------------------------------------------------------------- 1 | if [ $# != 1 ]; then 2 | echo "Usage: sh train_difficulty3.sh [RESTORE_MODEL_PATH]" 3 | exit 0 4 | fi 5 | 6 | # use which GPU 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | python train.py --actor_num 300 \ 10 | --difficulty 3 \ 11 | --vel_penalty_coeff 3.0 \ 12 | --penalty_coeff 2.0 \ 13 | --rpm_size 6e6 \ 14 | --train_times 250 \ 15 | --logdir ./output/difficulty3 \ 16 | --restore_model_path $1 17 | -------------------------------------------------------------------------------- /examples/NeurIPS2019-Learn-to-Move-Challenge/scripts/train_difficulty3_first_target.sh: -------------------------------------------------------------------------------- 1 | if [ $# != 1 ]; then 2 | echo "Usage: sh train_difficulty3_first_target.sh [RESTORE_MODEL_PATH]" 3 | exit 0 4 | fi 5 | 6 | # use which GPU 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | python train.py --actor_num 300 \ 10 | --difficulty 3 \ 11 | --vel_penalty_coeff 3.0 \ 12 | --penalty_coeff 3.0 \ 13 | --only_first_target \ 14 | --logdir ./output/difficulty3_first_target \ 15 | --restore_model_path $1 16 | -------------------------------------------------------------------------------- /examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/README.md: -------------------------------------------------------------------------------- 1 | ## NeurIPS2020 L2RPN Challenge 2 | 3 | The **PARL** team gets the first place for all tracks (both Robustness Track and Adaptability Track) in *NeurIPS2020 Learning-to-Run-a-Power-Network* challenge! 4 | 5 | <p align="center"> 6 | <img src="images/l2rpn.jpeg" alt="PARL" height="300" /> 7 | </p> 8 | 9 | ## Paper Citation 10 | 11 | If you use our code for your experiments or found it helpful, consider citing the following paper: 12 | 13 | <pre> 14 | @inproceedings{Zhou2021ActionSB, 15 | title={Action Set Based Policy Optimization for Safe Power Grid Management}, 16 | author={Bo Zhou and Hongsheng Zeng and Yuecheng Liu and Kejiao Li and Fan Wang and Hao Tian}, 17 | journal={ECML PKDD2021}, 18 | year={2021} 19 | } 20 | </pre> 21 | -------------------------------------------------------------------------------- /examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/images/l2rpn.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/images/l2rpn.jpeg -------------------------------------------------------------------------------- /examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track1/README.md: -------------------------------------------------------------------------------- 1 | ## Dependencies 2 | - python3.6 3 | - [parl==1.3.2](https://github.com/PaddlePaddle/PARL) 4 | - [paddlepaddle==1.6.1](https://github.com/PaddlePaddle/Paddle) 5 | - [grid2op==1.2.2](https://github.com/rte-france/Grid2Op) 6 | - [lightsim2grid==0.2.4](https://github.com/BDonnot/lightsim2grid) 7 | 8 | ## How to evaluate 9 | 1. Clone the repository. 10 | 2. Download the saved models from online storage service: [Baidu Pan](https://pan.baidu.com/s/1nqrIDomycy3D4OINSQV-8w) (password: `4801`) or [Google Drive](https://drive.google.com/file/d/1hq4Xf_xywrm3I-1bJNQt_QKrOi8HJrrr/view?usp=sharing) 11 | 3. Unpack the file: 12 | ``` 13 | tar -xvzf saved_files.tar.gz 14 | ``` 15 | 4. evaluate the result: 16 | ``` 17 | python evaluate.py --num_episodes=10 18 | ``` 19 | -------------------------------------------------------------------------------- /examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track2/README.md: -------------------------------------------------------------------------------- 1 | ## Dependencies 2 | - python3.6 3 | - [parl==1.3.2](https://github.com/PaddlePaddle/PARL) 4 | - [paddlepaddle==1.6.1](https://github.com/PaddlePaddle/Paddle) 5 | - [grid2op==1.2.2](https://github.com/rte-france/Grid2Op) 6 | - [lightsim2grid==0.2.4](https://github.com/BDonnot/lightsim2grid) 7 | 8 | ## How to evaluate 9 | 1. Clone the repository. 10 | 2. Download the saved models from online storage service: [Baidu Pan](https://pan.baidu.com/s/1sFR17yMUEsXtVyuomUkutw) (password: `uvi6`) or [Google Drive](https://drive.google.com/file/d/1vH52Z2DhSj5Vpk9pBhHKZk0qdqiJFXZU/view?usp=sharing) 11 | 3. Unpack the file: 12 | ``` 13 | tar -zxvf saved_files.tar.gz 14 | ``` 15 | 4. evaluate the result: 16 | ``` 17 | python evaluate.py --num_episodes=10 18 | ``` 19 | -------------------------------------------------------------------------------- /examples/NeurIPS2020-Learning-to-Run-a-Power-Network-Challenge/track2/es.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import parl 16 | 17 | __all__ = ['ES'] 18 | 19 | 20 | class ES(parl.Algorithm): 21 | def __init__(self, model): 22 | """ES algorithm. 23 | 24 | Since parameters of the model is updated in the numpy level, `learn` function is not needed 25 | in this algorithm. 26 | 27 | Args: 28 | model(`parl.Model`): policy model of ES algorithm. 29 | """ 30 | self.model = model 31 | 32 | def predict(self, obs): 33 | """Use the policy model to predict actions of observations. 34 | 35 | Args: 36 | obs(layers.data): data layer of observations. 37 | Returns: 38 | tensor of predicted actions. 39 | """ 40 | return self.model.predict(obs) 41 | -------------------------------------------------------------------------------- /examples/OAC/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce OAC with PARL 2 | Based on PARL, the OAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | > Paper: OAC in [Better Exploration with Optimistic Actor-Critic](https://arxiv.org/abs/1910.12807) 5 | 6 | ### Mujoco games introduction 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco). 8 | 9 | ### Benchmark result 10 | 11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/OAC/paddle/result.png" width="600" alt="OAC_results"/> 12 | 13 | ## How to use 14 | ### Dependencies: 15 | + python3.7+ 16 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 17 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle) 18 | + gym>=0.26.0 19 | + mujoco>=2.2.2 20 | 21 | ### Start Training: 22 | ``` 23 | # To train an agent for Humanoid-v4 game 24 | python train.py 25 | 26 | # To train for other game 27 | python train.py --env [ENV_NAME] 28 | -------------------------------------------------------------------------------- /examples/OAC/requirements.txt: -------------------------------------------------------------------------------- 1 | parl>=2.1.1 2 | paddlepaddle>=2.0.0 3 | gym>=0.26.0 4 | mujoco>=2.2.2 5 | -------------------------------------------------------------------------------- /examples/PPO/requirements_atari.txt: -------------------------------------------------------------------------------- 1 | gym==0.18.0 2 | paddlepaddle>=2.0.0 3 | parl>=2.1.1 4 | atari-py==0.2.6 5 | opencv-python 6 | -------------------------------------------------------------------------------- /examples/PPO/requirements_mujoco.txt: -------------------------------------------------------------------------------- 1 | gym>=0.26.0 2 | mujoco==2.2.2 3 | paddlepaddle>=2.0.0 4 | parl>=2.1.1 5 | -------------------------------------------------------------------------------- /examples/QMIX/README.md: -------------------------------------------------------------------------------- 1 | ## QMIX based on PARL and PaddlePaddle2.0 2 | We reproduce the QMIX based on **PARL** and **PaddlePaddle>=2.0.0**, reaching the same level of indicators as the paper in StarCraft2 benchmarks. 3 | ### QMIX 4 | QMIX is a **value-based** multi-agent reinforcement learning algorithm. 5 | Learn more about QMIX from: [QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1803.11485) 6 | ### StarCraft2 Environment 7 | Paper: [The StarCraft Multi-Agent Challenge](https://arxiv.org/pdf/1902.04043) 8 | Github Repositories: [smac](https://github.com/oxwhirl/smac) 9 | ## Benchmark Results 10 | <img src="images/paddle2.0_qmix_result.png" width = "700" alt="Performance" /> 11 | 12 | - We trained our model in 5 different scenarios: *"3m", "8m", "2s_3z", "3s_5z"* and *"1c_3s_5z"*. 13 | - The **difficulty** in all scenarios are set to be "7" (very difficult). 14 | - We trained our model 3 times for each scenario. 15 | 16 | ## How to Use 17 | ### Dependencies 18 | - python3.6+ 19 | + [parl>=2.0.0](https://github.com/PaddlePaddle/PARL) 20 | - [smac](https://github.com/oxwhirl/smac) 21 | - [PaddlePaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle) 22 | 23 | ### Start Training 24 | 1. Modify the config in `qmix_config.py`. 25 | 2. Start training: 26 | ```bash 27 | python train.py 28 | ``` 29 | 3. View the training process with tensorboard: 30 | ```bash 31 | tensorboard --logdir ./ 32 | ``` 33 | -------------------------------------------------------------------------------- /examples/QMIX/images/paddle2.0_qmix_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/QMIX/images/paddle2.0_qmix_result.png -------------------------------------------------------------------------------- /examples/QMIX/requirements.txt: -------------------------------------------------------------------------------- 1 | PaddlePaddle>=2.0.0 2 | smac 3 | parl>=2.0.0 4 | -------------------------------------------------------------------------------- /examples/QuickStart/README.md: -------------------------------------------------------------------------------- 1 | ## Paddle benchmark Quick Start 2 | Train an agent with PARL to solve the CartPole problem, a classical benchmark in RL. 3 | 4 | ## How to use 5 | ### Dependencies: 6 | 7 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 8 | + paddlepaddle>=2.0.0 9 | + gym 10 | + pygame 11 | 12 | ### Start Training: 13 | ``` 14 | python train.py 15 | ``` 16 | 17 | ### Expected Result 18 | <img src="performance.gif" width = "300" height ="200" alt="result"/> 19 | 20 | The agent can get around 200 points in a few seconds. 21 | -------------------------------------------------------------------------------- /examples/QuickStart/cartpole_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import paddle 16 | import paddle.nn as nn 17 | import paddle.nn.functional as F 18 | import parl 19 | 20 | 21 | class CartpoleModel(parl.Model): 22 | """ Linear network to solve Cartpole problem. 23 | 24 | Args: 25 | obs_dim (int): Dimension of observation space. 26 | act_dim (int): Dimension of action space. 27 | """ 28 | 29 | def __init__(self, obs_dim, act_dim): 30 | super(CartpoleModel, self).__init__() 31 | hid1_size = act_dim * 10 32 | self.fc1 = nn.Linear(obs_dim, hid1_size) 33 | self.fc2 = nn.Linear(hid1_size, act_dim) 34 | 35 | def forward(self, x): 36 | out = paddle.tanh(self.fc1(x)) 37 | prob = F.softmax(self.fc2(out), axis=-1) 38 | return prob 39 | -------------------------------------------------------------------------------- /examples/QuickStart/performance.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/examples/QuickStart/performance.gif -------------------------------------------------------------------------------- /examples/QuickStart/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | pygame 3 | parl>=2.1.1 4 | paddlepaddle>=2.0.0 5 | -------------------------------------------------------------------------------- /examples/SAC/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce SAC with PARL 2 | Based on PARL, the SAC algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | > Paper: SAC in [Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor](https://arxiv.org/abs/1801.01290) 5 | 6 | ### Mujoco games introduction 7 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco) 8 | 9 | ### Benchmark result 10 | 11 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/SAC/paddle/result.png" alt="SAC_results"/> 12 | 13 | + Each experiment was run three times with different seeds 14 | 15 | ## How to use 16 | ### Dependencies: 17 | + python3.7+ 18 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 19 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle) 20 | + gym>=0.26.0 21 | + mujoco-py>=2.2.2 22 | 23 | ### Start Training: 24 | #### Train 25 | ``` 26 | # To train for HalfCheetah-v4(default),Hopper-v4,Walker2d-v4,Ant-v4 27 | # --alpha 0.2(default) 28 | python train.py --env [ENV_NAME] 29 | 30 | # To reproduce the performance of Humanoid-v4 31 | python train.py --env Humanoid-v4 --alpha 0.05 32 | -------------------------------------------------------------------------------- /examples/SAC/requirements.txt: -------------------------------------------------------------------------------- 1 | parl>=2.1.1 2 | paddlepaddle>=2.0.0 3 | gym>=0.26.0 4 | mujoco>=2.2.2 5 | -------------------------------------------------------------------------------- /examples/TD3/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce TD3 with PARL 2 | Based on PARL, the TD3 algorithm of deep reinforcement learning has been reproduced, reaching the same level of indicators as the paper in Mujoco benchmarks. 3 | 4 | Include following improvements: 5 | + Clipped Double Q-learning 6 | + Target Networks and Delayed Policy Update 7 | + Target Policy Smoothing Regularization 8 | 9 | > TD3 in 10 | [Addressing Function Approximation Error in Actor-Critic Methods](https://arxiv.org/abs/1802.09477) 11 | 12 | ### Mujoco games introduction 13 | PARL currently supports the open-source version of Mujoco provided by DeepMind, so users do not need to download binaries of Mujoco as well as install mujoco-py and get license. For more details, please visit [Mujoco](https://github.com/deepmind/mujoco) 14 | 15 | ### Benchmark result 16 | 17 | <img src="https://github.com/benchmarking-rl/PARL-experiments/blob/master/TD3/paddle/result.png" alt="TD3_results"/> 18 | + Each experiment was run three times with different seeds 19 | 20 | ## How to use 21 | ### Dependencies: 22 | + python3.7+ 23 | + [parl>=2.1.1](https://github.com/PaddlePaddle/PARL) 24 | + [paddlepaddle>=2.0.0](https://github.com/PaddlePaddle/Paddle) 25 | + gym>=0.26.0 26 | + mujoco>=2.2.2 27 | 28 | ### Start Training: 29 | ``` 30 | # To train an agent for HalfCheetah-v4 game 31 | python train.py 32 | 33 | # To train for different game 34 | python train.py --env [ENV_NAME] 35 | -------------------------------------------------------------------------------- /examples/TD3/requirements.txt: -------------------------------------------------------------------------------- 1 | parl>=2.1.1 2 | paddlepaddle>=2.0.0 3 | gym>=0.26.0 4 | mujoco>=2.2.2 5 | -------------------------------------------------------------------------------- /examples/tutorials/README.md: -------------------------------------------------------------------------------- 1 | ## 《PARL强化学习入门实践》课程示例 2 | 3 | 针对强化学习初学者,PARL提供了[入门课程](https://aistudio.baidu.com/aistudio/course/introduce/1335),展示最基础的5个强化学习算法代码示例(注意:本课程示例均基于**静态图框架**编写)。 4 | 5 | ## News: 6 | + [2021.12.06] 应广大学员的要求,我们提供了课程配套代码的(lesson3-lesson5)的**动态图框架**版本,见 [链接](parl2_dygraph/)。lesson1-lesson2不涉及神经网络,可沿用本目录下的代码。 7 | 8 | ## 课程大纲 9 | + 一、强化学习(RL)初印象 10 | + RL概述、入门路线 11 | + 实践:环境搭建([lesson1](lesson1/gridworld.py) 的代码提供了格子环境世界的渲染封装) 12 | + 二、基于表格型方法求解RL 13 | + MDP、状态价值、Q表格 14 | + 实践: [Sarsa](lesson2/sarsa)、[Q-learning](lesson2/q_learning) 15 | + 三、基于神经网络方法求解RL 16 | + 函数逼近方法 17 | + 实践:[DQN](lesson3/dqn) 18 | + 四、基于策略梯度求解RL 19 | + 策略近似、策略梯度 20 | + 实践:[Policy Gradient](lesson4/policy_gradient) 21 | + 五、连续动作空间上求解RL 22 | + 实战:[DDPG](lesson5/ddpg) 23 | 24 | 25 | 26 | ## 使用说明 27 | 28 | ### 安装依赖(注意:请务必安装对应的版本) 29 | 30 | + Python 3.6/3.7 31 | + [paddlepaddle](https://github.com/PaddlePaddle/Paddle)==1.8.5 32 | + [parl](https://github.com/PaddlePaddle/PARL)==1.3.1 或者 parl==1.4 33 | + gym==0.18.0 34 | + atari-py==0.2.6 (仅 lesson4 的 homework 需要安装) 35 | + rlschool==0.3.1 (仅 lesson5 的 homework 需要安装) 36 | 37 | 可以直接安装本目录下的 `requirements.txt` 来完成以上依赖版本的适配。 38 | ``` 39 | pip install -r requirements.txt 40 | ``` 41 | 42 | ### 运行示例 43 | 44 | 进入每个示例对应的代码文件夹中,运行 45 | ``` 46 | python train.py 47 | ``` 48 | -------------------------------------------------------------------------------- /examples/tutorials/homework/lesson3/dqn_mountaincar/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #-*- coding: utf-8 -*- 16 | 17 | import parl 18 | from parl import layers # 封装了 paddle.fluid.layers 的API 19 | 20 | 21 | class Model(parl.Model): 22 | def __init__(self, act_dim): 23 | hid1_size = 128 24 | hid2_size = 128 25 | # 3层全连接网络 26 | self.fc1 = layers.fc(size=hid1_size, act='relu') 27 | self.fc2 = layers.fc(size=hid2_size, act='relu') 28 | self.fc3 = layers.fc(size=act_dim, act=None) 29 | 30 | def value(self, obs): 31 | h1 = self.fc1(obs) 32 | h2 = self.fc2(h1) 33 | Q = self.fc3(h2) 34 | return Q 35 | -------------------------------------------------------------------------------- /examples/tutorials/homework/lesson4/policy_gradient_pong/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #-*- coding: utf-8 -*- 16 | 17 | import parl 18 | from parl import layers 19 | 20 | 21 | class Model(parl.Model): 22 | def __init__(self, act_dim): 23 | act_dim = act_dim 24 | hid1_size = 256 25 | hid2_size = 64 26 | 27 | self.fc1 = layers.fc(size=hid1_size, act='relu') 28 | self.fc2 = layers.fc(size=hid2_size, act='relu') 29 | self.fc3 = layers.fc(size=act_dim, act='softmax') 30 | 31 | def forward(self, obs): 32 | h1 = self.fc1(obs) 33 | h2 = self.fc2(h1) 34 | out = self.fc3(h2) 35 | return out 36 | -------------------------------------------------------------------------------- /examples/tutorials/lesson3/dqn/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #-*- coding: utf-8 -*- 16 | 17 | import parl 18 | from parl import layers # 封装了 paddle.fluid.layers 的API 19 | 20 | 21 | class Model(parl.Model): 22 | def __init__(self, act_dim): 23 | hid1_size = 128 24 | hid2_size = 128 25 | # 3层全连接网络 26 | self.fc1 = layers.fc(size=hid1_size, act='relu') 27 | self.fc2 = layers.fc(size=hid2_size, act='relu') 28 | self.fc3 = layers.fc(size=act_dim, act=None) 29 | 30 | def value(self, obs): 31 | h1 = self.fc1(obs) 32 | h2 = self.fc2(h1) 33 | Q = self.fc3(h2) 34 | return Q 35 | -------------------------------------------------------------------------------- /examples/tutorials/lesson4/policy_gradient/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #-*- coding: utf-8 -*- 16 | 17 | import parl 18 | from parl import layers 19 | 20 | 21 | class Model(parl.Model): 22 | def __init__(self, act_dim): 23 | act_dim = act_dim 24 | hid1_size = act_dim * 10 25 | 26 | self.fc1 = layers.fc(size=hid1_size, act='tanh') 27 | self.fc2 = layers.fc(size=act_dim, act='softmax') 28 | 29 | def forward(self, obs): # 可直接用 model = Model(5); model(obs)调用 30 | out = self.fc1(obs) 31 | out = self.fc2(out) 32 | return out 33 | -------------------------------------------------------------------------------- /examples/tutorials/parl2_dygraph/README.md: -------------------------------------------------------------------------------- 1 | ## 《PARL强化学习入门实践》课程示例(动态图版本) 2 | + 应广大学员的要求,我们提供了课程配套代码的(lesson3-lesson5)的**动态图框架**版本, lesson1-lesson2不涉及神经网络,可沿用上级目录中的代码。 3 | 4 | ## 代码大纲 5 | + `lesson3`:基于神经网络方法求解RL 6 | + dqn:使用 DQN 算法解决 CartPole 问题。 7 | + homework:使用 DQN 算法解决 MountainCar 问题。 8 | + `lesson4`:基于策略梯度求解RL 9 | + policy_gradient:使用 PG 算法解决 CartPole 问题。 10 | + homework: 使用 PG 算法解决 Atari 游戏里的 Pong 环境。 11 | + `lesson5`:连续动作空间上求解RL 12 | + ddpg:使用 DDPG 算法解决连续动作版本的 CartPole 问题。 13 | + homework:使用 DDPG 算法解决四轴飞行器的悬停问题。 14 | 15 | 16 | ## 使用说明 17 | 18 | ### 安装依赖(注意:请务必安装对应的版本) 19 | 20 | + Python 3.6/3.7/3.8 21 | + [paddlepaddle](https://github.com/PaddlePaddle/Paddle)==2.2.0 22 | + [parl](https://github.com/PaddlePaddle/PARL)==2.0.3 23 | + gym==0.18.0 24 | + atari-py==0.2.6 (仅 lesson4 的 homework 需要安装) 25 | + rlschool==0.3.1 (仅 lesson5 的 homework 需要安装) 26 | 27 | 可以直接安装本目录下的 `requirements.txt` 来完成以上依赖版本的适配。 28 | ``` 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | ### 运行示例 33 | 34 | 进入每个示例对应的代码文件夹中,运行 35 | ``` 36 | python train.py 37 | ``` 38 | -------------------------------------------------------------------------------- /examples/tutorials/parl2_dygraph/lesson4/policy_gradient/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #-*- coding: utf-8 -*- 16 | 17 | import paddle 18 | import paddle.nn as nn 19 | import paddle.nn.functional as F 20 | import parl 21 | 22 | 23 | class Model(parl.Model): 24 | """ 使用全连接网络. 25 | 26 | 参数: 27 | obs_dim (int): 观测空间的维度. 28 | act_dim (int): 动作空间的维度. 29 | """ 30 | 31 | def __init__(self, obs_dim, act_dim): 32 | super(Model, self).__init__() 33 | hid1_size = act_dim * 10 34 | self.fc1 = nn.Linear(obs_dim, hid1_size) 35 | self.fc2 = nn.Linear(hid1_size, act_dim) 36 | 37 | def forward(self, x): # 可直接用 model = Model(5); model(obs)调用 38 | out = paddle.tanh(self.fc1(x)) 39 | prob = F.softmax(self.fc2(out), axis=-1) 40 | return prob 41 | -------------------------------------------------------------------------------- /examples/tutorials/parl2_dygraph/requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements for tutorials (paddle dygraph version) 2 | paddlepaddle==2.2.0 3 | parl==2.0.3 4 | gym==0.18.0 5 | atari-py==0.2.6 6 | rlschool==0.3.1 7 | -------------------------------------------------------------------------------- /examples/tutorials/requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements for tutorials (paddle fluid version) 2 | paddlepaddle==1.8.5 3 | parl==1.4 4 | gym==0.18.0 5 | atari-py==0.2.6 6 | rlschool==0.3.1 7 | -------------------------------------------------------------------------------- /papers/NeurIPS 2019 RL papers.numbers: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/papers/NeurIPS 2019 RL papers.numbers -------------------------------------------------------------------------------- /parl/algorithms/fluid/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.algorithms.fluid.a3c import * 16 | from parl.algorithms.fluid.ddpg import * 17 | from parl.algorithms.fluid.maddpg import * 18 | from parl.algorithms.fluid.dqn import * 19 | from parl.algorithms.fluid.ddqn import * 20 | from parl.algorithms.fluid.policy_gradient import * 21 | from parl.algorithms.fluid.ppo import * 22 | from parl.algorithms.fluid.td3 import * 23 | from parl.algorithms.fluid.sac import * 24 | from parl.algorithms.fluid.impala.impala import * 25 | from parl.algorithms.fluid.qmix import * 26 | -------------------------------------------------------------------------------- /parl/algorithms/fluid/impala/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.algorithms.fluid.impala.impala import * 16 | -------------------------------------------------------------------------------- /parl/algorithms/paddle/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.algorithms.paddle.policy_gradient import * 16 | from parl.algorithms.paddle.td3 import * 17 | from parl.algorithms.paddle.sac import * 18 | from parl.algorithms.paddle.dqn import * 19 | from parl.algorithms.paddle.ddpg import * 20 | from parl.algorithms.paddle.oac import * 21 | from parl.algorithms.paddle.qmix import * 22 | from parl.algorithms.paddle.a2c import * 23 | from parl.algorithms.paddle.ddqn import * 24 | from parl.algorithms.paddle.maddpg import * 25 | from parl.algorithms.paddle.ppo import * 26 | from parl.algorithms.paddle.cql import * 27 | from parl.algorithms.paddle.impala.impala import * 28 | -------------------------------------------------------------------------------- /parl/algorithms/paddle/impala/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.algorithms.paddle.impala.impala import * 16 | -------------------------------------------------------------------------------- /parl/algorithms/torch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.algorithms.torch.ddqn import * 16 | from parl.algorithms.torch.dqn import * 17 | from parl.algorithms.torch.a2c import * 18 | from parl.algorithms.torch.td3 import * 19 | from parl.algorithms.torch.coma import * 20 | from parl.algorithms.torch.ppo import * 21 | from parl.algorithms.torch.policy_gradient import * 22 | from parl.algorithms.torch.qmix import * 23 | from parl.algorithms.torch.ddpg import * 24 | from parl.algorithms.torch.sac import * 25 | from parl.algorithms.torch.oac import * 26 | from parl.algorithms.torch.maddpg import * 27 | from parl.algorithms.torch.cql import * 28 | from parl.algorithms.torch.mappo import * 29 | from parl.algorithms.torch.dt import * 30 | -------------------------------------------------------------------------------- /parl/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.core.agent_base import * 16 | from parl.core.model_base import * 17 | from parl.core.algorithm_base import * 18 | -------------------------------------------------------------------------------- /parl/core/fluid/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.core.fluid.model import * 16 | from parl.core.fluid.algorithm import * 17 | from parl.core.fluid.agent import * 18 | 19 | from . import layers 20 | from . import plutils 21 | from . import policy_distribution 22 | -------------------------------------------------------------------------------- /parl/core/fluid/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | This file wraps Fluid layers that have parameters to support parameter sharing. 16 | For other layers that don't have parameters, we simply copy them to this namespace. 17 | """ 18 | from paddle.fluid.layers import * 19 | from parl.core.fluid.layers.layer_wrappers import * 20 | -------------------------------------------------------------------------------- /parl/core/fluid/plutils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.core.fluid.plutils.common import * 16 | -------------------------------------------------------------------------------- /parl/core/paddle/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.core.paddle.model import * 16 | from parl.core.paddle.algorithm import * 17 | from parl.core.paddle.agent import * 18 | -------------------------------------------------------------------------------- /parl/core/tests/model_base_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | from parl.core.model_base import ModelBase 17 | 18 | 19 | class TestBaseModel(ModelBase): 20 | def forward(self, x, y): 21 | return x + y 22 | 23 | 24 | class ModelBaseTest(unittest.TestCase): 25 | def setUp(self): 26 | self.model = TestBaseModel() 27 | 28 | def test_forward(self): 29 | x, y = 10, 20 30 | expected_out = x + y 31 | forward_out = self.model(x, y) 32 | self.assertEqual(forward_out, expected_out) 33 | 34 | 35 | if __name__ == '__main__': 36 | unittest.main() 37 | -------------------------------------------------------------------------------- /parl/core/torch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.core.torch.model import * 16 | from parl.core.torch.algorithm import * 17 | from parl.core.torch.agent import * 18 | -------------------------------------------------------------------------------- /parl/env/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.env.vector_env import * 16 | from parl.env.continuous_wrappers import * 17 | from parl.env.compat_wrappers import * 18 | -------------------------------------------------------------------------------- /parl/remote/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.remote.master import * 16 | from parl.remote.worker import * 17 | from parl.remote.client import * 18 | from parl.remote.exceptions import * 19 | from parl.remote.remote_decorator import * 20 | -------------------------------------------------------------------------------- /parl/remote/compatible_trick.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | This file is used to fix the problem that cloudpickle cannot load some packages normally in Mac OS. 16 | We hack the problem by trying load these packages in the main module in advance. 17 | 18 | Template: 19 | 20 | try: 21 | import [PACKAGE1] 22 | except ImportError: 23 | pass 24 | 25 | try: 26 | import [PACKAGE2] 27 | except ImportError: 28 | pass 29 | 30 | """ 31 | from parl.utils import _IS_MAC 32 | 33 | if _IS_MAC: 34 | try: 35 | import rlschool 36 | except ImportError: 37 | pass 38 | -------------------------------------------------------------------------------- /parl/remote/future_mode/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.remote.future_mode.future_object import * 16 | from parl.remote.future_mode.proxy_wrapper_nowait import * 17 | -------------------------------------------------------------------------------- /parl/remote/grpc_heartbeat/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.remote.grpc_heartbeat.heartbeat_server import * 16 | from parl.remote.grpc_heartbeat.heartbeat_client import * 17 | -------------------------------------------------------------------------------- /parl/remote/grpc_heartbeat/heartbeat.proto: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | syntax = "proto3"; 16 | 17 | // The hearbeat service definition. 18 | service GrpcHeartbeat { 19 | rpc Send(Request) returns (Reply) {} 20 | } 21 | 22 | // The request message. 23 | message Request { 24 | string client_id = 1; 25 | bytes tag = 2; 26 | string instance_id = 4; // used in heartbeat detection between the job and client. 27 | string extra_msg = 8; 28 | } 29 | 30 | // The response message 31 | message Reply { 32 | bytes tag = 1; 33 | } 34 | -------------------------------------------------------------------------------- /parl/remote/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/parl/remote/static/favicon.ico -------------------------------------------------------------------------------- /parl/remote/static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/parl/remote/static/logo.png -------------------------------------------------------------------------------- /parl/remote/tests/client_not_init_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import unittest 15 | import parl 16 | 17 | 18 | @parl.remote_class 19 | class Actor(object): 20 | pass 21 | 22 | 23 | class TestClient(unittest.TestCase): 24 | def test_not_init(self): 25 | """client is expected to raise an error and say that the master has not been started""" 26 | 27 | def create_actor(): 28 | actor = Actor() 29 | 30 | self.assertRaises(AssertionError, create_actor) 31 | 32 | 33 | if __name__ == '__main__': 34 | unittest.main() 35 | -------------------------------------------------------------------------------- /parl/remote/tests/mocking_env_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from parl.remote.test_utils import MockingEnv 15 | from parl.remote.test_utils import env_config_for_test 16 | import unittest 17 | import parl 18 | 19 | 20 | @env_config_for_test 21 | class TestMockingEnv(MockingEnv): 22 | def test_gpu_env(self): 23 | ret = parl.utils.machine_info.is_gpu_available() 24 | assert ret == self.return_value 25 | 26 | 27 | if __name__ == '__main__': 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /parl/remote/tests/rom/pong.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/parl/remote/tests/rom/pong.bin -------------------------------------------------------------------------------- /parl/remote/tests/simulate_client.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import time 15 | import parl 16 | 17 | 18 | @parl.remote_class 19 | class Actor(object): 20 | def add_one(self, value): 21 | value += 1 22 | return value 23 | 24 | 25 | def train(): 26 | # reset_job_test.py will execute simulate_client.py, these two files must use the same port 27 | parl.connect('localhost:1337') # can not use get_free_tcp_port() 28 | actor = Actor() 29 | actor.add_one(1) 30 | time.sleep(100000) 31 | 32 | 33 | if __name__ == '__main__': 34 | train() 35 | -------------------------------------------------------------------------------- /parl/remote/tests/test_import_module/Module2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import parl 15 | 16 | 17 | @parl.remote_class 18 | class B(object): 19 | def add_sum(self, a, b): 20 | return a + b 21 | -------------------------------------------------------------------------------- /parl/remote/tests/test_import_module/main_abs_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import unittest 15 | import parl 16 | from parl.utils.test_utils import XparlTestCase 17 | 18 | class TestImport(XparlTestCase): 19 | def test_import_local_module(self): 20 | from Module2 import B 21 | self.add_master() 22 | self.add_worker(n_cpu=1) 23 | parl.connect("localhost:{}".format(self.port)) 24 | obj = B() 25 | res = obj.add_sum(10, 5) 26 | self.assertEqual(res, 15) 27 | 28 | if __name__ == '__main__': 29 | unittest.main(failfast=True) 30 | -------------------------------------------------------------------------------- /parl/remote/tests/test_import_module/subdir/Module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import parl 15 | 16 | 17 | @parl.remote_class 18 | class A(object): 19 | def add_sum(self, a, b): 20 | return a + b 21 | -------------------------------------------------------------------------------- /parl/remote/tests/test_import_module/subdir/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /parl/tests/import_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | 18 | class ImportTest(unittest.TestCase): 19 | def test_import_parl_alone(self): 20 | import parl 21 | 22 | 23 | if __name__ == '__main__': 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /parl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from parl.utils.exceptions import * 16 | from parl.utils.utils import * 17 | from parl.utils.csv_logger import * 18 | from parl.utils.machine_info import * 19 | from parl.utils.np_utils import * 20 | from parl.utils.replay_memory import * 21 | from parl.utils.rl_utils import * 22 | from parl.utils.scheduler import * 23 | from parl.utils.path_utils import * 24 | from parl.utils.globvars import * 25 | -------------------------------------------------------------------------------- /parl/utils/np_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numpy as np 16 | 17 | __all__ = ['np_softmax', 'np_cross_entropy'] 18 | 19 | 20 | def np_softmax(logits): 21 | return np.exp(logits) / np.sum(np.exp(logits), axis=-1, keepdims=True) 22 | 23 | 24 | def np_cross_entropy(probs, labels): 25 | if labels.shape[-1] == 1: 26 | # sparse label 27 | n_classes = probs.shape[-1] 28 | result_shape = list(labels.shape[:-1]) + [n_classes] 29 | labels = np.eye(n_classes)[labels.reshape(-1)] 30 | labels = labels.reshape(result_shape) 31 | 32 | return -np.sum(labels * np.log(probs), axis=-1, keepdims=True) 33 | -------------------------------------------------------------------------------- /parl/utils/path_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = ['format_uniform_path'] 16 | 17 | import os 18 | 19 | 20 | def format_uniform_path(path): 21 | """format the path to a new path which separated by os.sep. 22 | """ 23 | path = path.replace("//", os.sep) 24 | path = path.replace("/", os.sep) 25 | path = path.replace("\\", os.sep) 26 | return path 27 | -------------------------------------------------------------------------------- /parl/utils/summary.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | try: 16 | from parl.utils.visualdl import * 17 | except ImportError: 18 | from parl.utils.tensorboard import * 19 | -------------------------------------------------------------------------------- /parl/utils/tests/globvar_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | import argparse 17 | from parl.utils import logger 18 | from parl.utils import global_config as config 19 | 20 | 21 | class TestGlobalConfig(unittest.TestCase): 22 | def test_load_argument(self): 23 | config.load_argument(parser.parse_args()) 24 | self.assertEqual(config.embed_dim, 128) 25 | self.assertEqual(config.n_head, 1) 26 | 27 | if __name__ == '__main__': 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('--embed_dim', type=int, default=128) 30 | parser.add_argument('--n_head', type=int, default=1) 31 | unittest.main() 32 | -------------------------------------------------------------------------------- /parl/utils/tests/not_import_dl_framework_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | import sys 17 | import os 18 | import importlib 19 | 20 | class TestNotImportPaddle(unittest.TestCase): 21 | def test_import(self): 22 | # setting this environment variable will not import deep learning framework 23 | os.environ['XPARL_igonre_core'] = 'true' 24 | import parl 25 | self.assertFalse('paddle' in sys.modules) 26 | # remove the environment vaiable and reimport the lib 27 | del os.environ['XPARL_igonre_core'] 28 | importlib.reload(parl) 29 | self.assertTrue('paddle' in sys.modules) 30 | 31 | if __name__ == '__main__': 32 | unittest.main() 33 | -------------------------------------------------------------------------------- /parl/utils/tests/not_import_dl_framework_test_torch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | import sys 17 | import os 18 | import importlib 19 | 20 | class TestNotImportPaddle(unittest.TestCase): 21 | def test_import(self): 22 | # setting this environment variable will not import deep learning framework 23 | os.environ['XPARL_igonre_core'] = 'true' 24 | import parl 25 | self.assertFalse('torch' in sys.modules) 26 | # remove the environment vaiable and reimport the lib 27 | del os.environ['XPARL_igonre_core'] 28 | importlib.reload(parl) 29 | self.assertTrue('torch' in sys.modules) 30 | 31 | if __name__ == '__main__': 32 | unittest.main() 33 | -------------------------------------------------------------------------------- /test_tipc/configs/A2C/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:A2C 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --max_sample_steps:lite_train_lite_infer=10 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/A2C/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/CQL/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:CQL 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --train_total_steps:lite_train_lite_infer=10 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/CQL/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/DDPG/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:DDPG 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --train_total_steps:lite_train_lite_infer=10001 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/DDPG/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/DQN/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:DQN 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --max_episode:lite_train_lite_infer=3 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/DQN/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/DQN_variant/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:DQN_variant 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --train_total_steps:lite_train_lite_infer=1 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/DQN_variant/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/ES/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:ES 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --train_steps:lite_train_lite_infer=1 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/ES/train.py --actor_num 2 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/MADDPG/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:MADDPG 3 | python:python3.7 4 | gpu_list:0 5 | null:null 6 | --max_episode:lite_train_lite_infer=3 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/MADDPG/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/OAC/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:OAC 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --train_total_steps:lite_train_lite_infer=10001 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/OAC/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/PPO/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:PPO 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --train_total_steps:lite_train_lite_infer=5000 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/PPO/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/QuickStart/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:QuickStart 3 | python:python3.7 4 | gpu_list:0 5 | null:null 6 | --max_episode:lite_train_lite_infer=3 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/QuickStart/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/SAC/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:SAC 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --train_total_steps:lite_train_lite_infer=10001 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/SAC/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/configs/TD3/train_infer_python.txt: -------------------------------------------------------------------------------- 1 | ===========================train_params=========================== 2 | model_name:TD3 3 | python:python3.7 4 | gpu_list:-1 5 | null:null 6 | --train_total_steps:lite_train_lite_infer=10001 7 | null:null 8 | null:null 9 | null:null 10 | train_model_name:null 11 | null:null 12 | ## 13 | trainer:norm_train 14 | norm_train:examples/TD3/train.py 15 | ## 16 | ===========================eval_params=========================== 17 | eval:null 18 | ## 19 | ===========================infer_params=========================== 20 | null:null 21 | null:null 22 | null:null 23 | ## 24 | null:null 25 | null:null 26 | ## 27 | null:null 28 | null:null 29 | null:null 30 | null:null 31 | null:null 32 | null:null 33 | -------------------------------------------------------------------------------- /test_tipc/docs/test_tipc_log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PARL/88af2676301372f52d02a5df671847a12dcc8a2e/test_tipc/docs/test_tipc_log.png --------------------------------------------------------------------------------