├── .gitignore ├── LICENSE ├── README.md ├── configs ├── env │ └── env_config_for_ppo.json └── train │ ├── iteration_1 │ ├── lambda_0 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-1 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-2 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-3 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-4 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ └── lambda_1e0 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── iteration_2 │ ├── lambda_0 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-1 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-2 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-3 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-4 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ └── lambda_1e0 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── iteration_3 │ ├── lambda_0 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-1 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-2 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-3 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-4 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ └── lambda_1e0 │ │ ├── ppo_bc_config_10hz_128_128_1.json │ │ ├── ppo_bc_config_10hz_128_128_2.json │ │ ├── ppo_bc_config_10hz_128_128_3.json │ │ ├── ppo_bc_config_10hz_128_128_4.json │ │ └── ppo_bc_config_10hz_128_128_5.json │ └── iteration_4 │ ├── lambda_0 │ ├── ppo_bc_config_10hz_128_128_1.json │ ├── ppo_bc_config_10hz_128_128_2.json │ ├── ppo_bc_config_10hz_128_128_3.json │ ├── ppo_bc_config_10hz_128_128_4.json │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-1 │ ├── ppo_bc_config_10hz_128_128_1.json │ ├── ppo_bc_config_10hz_128_128_2.json │ ├── ppo_bc_config_10hz_128_128_3.json │ ├── ppo_bc_config_10hz_128_128_4.json │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-2 │ ├── ppo_bc_config_10hz_128_128_1.json │ ├── ppo_bc_config_10hz_128_128_2.json │ ├── ppo_bc_config_10hz_128_128_3.json │ ├── ppo_bc_config_10hz_128_128_4.json │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-3 │ ├── ppo_bc_config_10hz_128_128_1.json │ ├── ppo_bc_config_10hz_128_128_2.json │ ├── ppo_bc_config_10hz_128_128_3.json │ ├── ppo_bc_config_10hz_128_128_4.json │ └── ppo_bc_config_10hz_128_128_5.json │ ├── lambda_1e-4 │ ├── ppo_bc_config_10hz_128_128_1.json │ ├── ppo_bc_config_10hz_128_128_2.json │ ├── ppo_bc_config_10hz_128_128_3.json │ ├── ppo_bc_config_10hz_128_128_4.json │ └── ppo_bc_config_10hz_128_128_5.json │ └── lambda_1e0 │ ├── ppo_bc_config_10hz_128_128_1.json │ ├── ppo_bc_config_10hz_128_128_2.json │ ├── ppo_bc_config_10hz_128_128_3.json │ ├── ppo_bc_config_10hz_128_128_4.json │ └── ppo_bc_config_10hz_128_128_5.json ├── demonstrations ├── rollout_trajs │ ├── __init__.py │ ├── rollout_by_pid.py │ ├── rollout_by_pid_parallel.py │ └── rollout_by_policy_and_update_demostrations.py └── utils │ ├── augment_trajs.py │ ├── load_dataset.py │ ├── rename_files.py │ └── smoothness │ ├── __init__.py │ ├── fourier.py │ ├── smoothness_measure.py │ ├── test_fft.ipynb │ ├── test_fft2.ipynb │ └── test_fourier.ipynb ├── exp_on_d4rl ├── README.md ├── configs │ ├── iter_1 │ │ ├── seed1 │ │ │ ├── kl_0 │ │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_annealing.json │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_ema.json │ │ │ │ ├── medium_hopper_256_256_kl1e-1.json │ │ │ │ ├── medium_hopper_256_256_kl1e-1_annealing.json │ │ │ │ └── medium_hopper_256_256_kl1e-1_ema.json │ │ │ ├── kl_1e-2 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ ├── kl_1e-3 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ │ └── medium_hopper_256_256_kl1e-3.json │ │ │ ├── medium_halfcheetah_256_256.json │ │ │ ├── medium_halfcheetah_256_256_64envs.json │ │ │ └── medium_hopper_256_256.json │ │ ├── seed2 │ │ │ ├── kl_0 │ │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_annealing.json │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_ema.json │ │ │ │ ├── medium_hopper_256_256_kl1e-1.json │ │ │ │ ├── medium_hopper_256_256_kl1e-1_annealing.json │ │ │ │ └── medium_hopper_256_256_kl1e-1_ema.json │ │ │ ├── kl_1e-2 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ ├── kl_1e-3 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ │ └── medium_hopper_256_256_kl1e-3.json │ │ │ ├── medium_halfcheetah_256_256.json │ │ │ └── medium_hopper_256_256.json │ │ ├── seed3 │ │ │ ├── kl_0 │ │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_annealing.json │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_ema.json │ │ │ │ ├── medium_hopper_256_256_kl1e-1.json │ │ │ │ ├── medium_hopper_256_256_kl1e-1_annealing.json │ │ │ │ └── medium_hopper_256_256_kl1e-1_ema.json │ │ │ ├── kl_1e-2 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ ├── kl_1e-3 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ │ └── medium_hopper_256_256_kl1e-3.json │ │ │ ├── medium_halfcheetah_256_256.json │ │ │ └── medium_hopper_256_256.json │ │ ├── seed4 │ │ │ ├── kl_0 │ │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_annealing.json │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_ema.json │ │ │ │ ├── medium_hopper_256_256_kl1e-1.json │ │ │ │ ├── medium_hopper_256_256_kl1e-1_annealing.json │ │ │ │ └── medium_hopper_256_256_kl1e-1_ema.json │ │ │ ├── kl_1e-2 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ ├── kl_1e-3 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ │ └── medium_hopper_256_256_kl1e-3.json │ │ │ ├── medium_halfcheetah_256_256.json │ │ │ └── medium_hopper_256_256.json │ │ └── seed5 │ │ │ ├── kl_0 │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_annealing.json │ │ │ ├── medium_halfcheetah_256_256_kl1e-1_ema.json │ │ │ ├── medium_hopper_256_256_kl1e-1.json │ │ │ ├── medium_hopper_256_256_kl1e-1_annealing.json │ │ │ └── medium_hopper_256_256_kl1e-1_ema.json │ │ │ ├── kl_1e-2 │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ ├── kl_1e-3 │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ └── medium_hopper_256_256_kl1e-3.json │ │ │ ├── medium_halfcheetah_256_256.json │ │ │ └── medium_hopper_256_256.json │ ├── iter_2 │ │ ├── seed1 │ │ │ ├── kl_0 │ │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ │ └── medium_hopper_256_256_kl1e-1.json │ │ │ ├── kl_1e-2 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ └── kl_1e-3 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ │ └── medium_hopper_256_256_kl1e-3.json │ │ ├── seed2 │ │ │ ├── kl_0 │ │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ │ └── medium_hopper_256_256_kl1e-1.json │ │ │ ├── kl_1e-2 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ └── kl_1e-3 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ │ └── medium_hopper_256_256_kl1e-3.json │ │ ├── seed3 │ │ │ ├── kl_0 │ │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ │ └── medium_hopper_256_256_kl1e-1.json │ │ │ ├── kl_1e-2 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ └── kl_1e-3 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ │ └── medium_hopper_256_256_kl1e-3.json │ │ ├── seed4 │ │ │ ├── kl_0 │ │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ │ └── medium_hopper_256_256_kl1e-1.json │ │ │ ├── kl_1e-2 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ └── kl_1e-3 │ │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ │ └── medium_hopper_256_256_kl1e-3.json │ │ └── seed5 │ │ │ ├── kl_0 │ │ │ ├── medium_halfcheetah_256_256_kl0.json │ │ │ └── medium_hopper_256_256_kl0.json │ │ │ ├── kl_1 │ │ │ ├── medium_halfcheetah_256_256_kl1.json │ │ │ └── medium_hopper_256_256_kl1.json │ │ │ ├── kl_1e-1 │ │ │ ├── medium_halfcheetah_256_256_kl1e-1.json │ │ │ └── medium_hopper_256_256_kl1e-1.json │ │ │ ├── kl_1e-2 │ │ │ ├── medium_halfcheetah_256_256_kl1e-2.json │ │ │ └── medium_hopper_256_256_kl1e-2.json │ │ │ └── kl_1e-3 │ │ │ ├── medium_halfcheetah_256_256_kl1e-3.json │ │ │ └── medium_hopper_256_256_kl1e-3.json │ └── load_config.py ├── evaluate │ └── evaluate_kl.ipynb ├── models │ ├── ray_mlp_model.py │ ├── sb3_cnn_model.py │ └── sb3_model.py ├── rollout │ ├── README.md │ ├── load_data.py │ ├── rollout.ipynb │ ├── rollout.py │ ├── rollout.sh │ └── rollout_by_multi_policies.ipynb ├── sb3_bc_train.py ├── sb3_gail_train.py ├── sb3_rl_train.py ├── sb3_rl_train_after_bc.py ├── sb3_rl_train_after_bc_ema.py ├── scripts │ ├── train_all │ │ └── iter_1 │ │ │ └── medium_halfcheetah_kl_1e-1.sh │ ├── train_bc │ │ └── medium │ │ │ ├── iter_1 │ │ │ ├── halfcheetah.sh │ │ │ └── hopper.sh │ │ │ └── iter_2 │ │ │ ├── halfcheetah.sh │ │ │ └── hopper.sh │ ├── train_rl │ │ ├── halfcheetah.sh │ │ └── hopper.sh │ └── train_rl_after_bc │ │ └── medium │ │ ├── iter_1 │ │ ├── halfcheetah.sh │ │ ├── halfcheetah_annealing.sh │ │ └── hopper.sh │ │ └── iter_2 │ │ ├── halfcheetah.sh │ │ ├── halfcheetah_kl_1e-1.sh │ │ ├── hopper.sh │ │ └── hopper_kl_1e-1.sh └── utils │ ├── load_data.py │ ├── load_data_with_dones_and_next_obs.py │ ├── sb3_callbacks.py │ ├── sb3_env_utils.py │ ├── sb3_env_wrappers.py │ ├── sb3_eval_callback.py │ ├── sb3_evaluate_kl.py │ ├── sb3_evaluate_policy.py │ └── sb3_schedule.py ├── exp_on_panda ├── README.md ├── configs │ ├── iter_1 │ │ ├── seed1 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_0_64envs.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-1_annealing.json │ │ │ ├── reacher_256_256_kl_1e-1_ema.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ ├── reacher_256_256_kl_1e0.json │ │ │ ├── reacher_mr_sac_256_256.json │ │ │ ├── reacher_nmr_jump_2_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_2_sac_256_256.json │ │ │ ├── reacher_nmr_jump_3_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_3_sac_256_256.json │ │ │ ├── reacher_nmr_waypoint_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_waypoint_sac_256_256.json │ │ │ └── reacher_nmr_waypoint_sac_her_256_256.json │ │ ├── seed2 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-1_annealing.json │ │ │ ├── reacher_256_256_kl_1e-1_ema.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ ├── reacher_256_256_kl_1e0.json │ │ │ ├── reacher_mr_sac_256_256.json │ │ │ ├── reacher_nmr_jump_2_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_2_sac_256_256.json │ │ │ ├── reacher_nmr_jump_3_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_3_sac_256_256.json │ │ │ ├── reacher_nmr_waypoint_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_waypoint_sac_256_256.json │ │ │ └── reacher_nmr_waypoint_sac_her_256_256.json │ │ ├── seed3 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-1_annealing.json │ │ │ ├── reacher_256_256_kl_1e-1_ema.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ ├── reacher_256_256_kl_1e0.json │ │ │ ├── reacher_mr_sac_256_256.json │ │ │ ├── reacher_nmr_jump_2_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_2_sac_256_256.json │ │ │ ├── reacher_nmr_jump_3_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_3_sac_256_256.json │ │ │ ├── reacher_nmr_waypoint_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_waypoint_sac_256_256.json │ │ │ └── reacher_nmr_waypoint_sac_her_256_256.json │ │ ├── seed4 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-1_annealing.json │ │ │ ├── reacher_256_256_kl_1e-1_ema.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ ├── reacher_256_256_kl_1e0.json │ │ │ ├── reacher_mr_sac_256_256.json │ │ │ ├── reacher_nmr_jump_2_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_2_sac_256_256.json │ │ │ ├── reacher_nmr_jump_3_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_3_sac_256_256.json │ │ │ ├── reacher_nmr_waypoint_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_waypoint_sac_256_256.json │ │ │ └── reacher_nmr_waypoint_sac_her_256_256.json │ │ └── seed5 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-1_annealing.json │ │ │ ├── reacher_256_256_kl_1e-1_ema.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ ├── reacher_256_256_kl_1e0.json │ │ │ ├── reacher_mr_sac_256_256.json │ │ │ ├── reacher_nmr_jump_2_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_2_sac_256_256.json │ │ │ ├── reacher_nmr_jump_3_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_jump_3_sac_256_256.json │ │ │ ├── reacher_nmr_waypoint_256_256_kl_1e-2.json │ │ │ ├── reacher_nmr_waypoint_sac_256_256.json │ │ │ └── reacher_nmr_waypoint_sac_her_256_256.json │ ├── iter_2 │ │ ├── seed1 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ │ ├── seed2 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ │ ├── seed3 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ │ ├── seed4 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ │ └── seed5 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ ├── iter_3 │ │ ├── seed1 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ │ ├── seed2 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ │ ├── seed3 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ │ ├── seed4 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ │ └── seed5 │ │ │ ├── reacher_256_256_kl_0.json │ │ │ ├── reacher_256_256_kl_1e-1.json │ │ │ ├── reacher_256_256_kl_1e-1_2e7steps.json │ │ │ ├── reacher_256_256_kl_1e-2.json │ │ │ ├── reacher_256_256_kl_1e-3.json │ │ │ └── reacher_256_256_kl_1e0.json │ └── load_config.py ├── evaluate.py ├── models │ └── sb3_model.py ├── my_reach_env.py ├── rollout │ ├── discoutinuity │ │ └── rollout_my_reach_by_rl_bc.ipynb │ ├── rollout_by_pid.py │ ├── rollout_by_policy.py │ ├── rollout_my_reach_by_bc.ipynb │ ├── rollout_my_reach_by_pid.ipynb │ ├── rollout_my_reach_by_rl_bc.ipynb │ ├── rollout_my_reach_waypoint_by_pid.ipynb │ ├── rollout_reach_by_pid.ipynb │ └── utils │ │ └── calc_statistics.py ├── sb3_bc_train.py ├── sb3_rl_train.py ├── sb3_rl_train_after_bc.py ├── sb3_rl_train_after_bc_with_ema.py ├── sb3_rl_train_after_rl.py ├── scripts │ ├── train_bc │ │ ├── iter_1.sh │ │ ├── iter_2.sh │ │ └── iter_3.sh │ ├── train_rl │ │ └── iter_1.sh │ ├── train_rl_after_bc │ │ ├── iter_1_kl_0.sh │ │ ├── iter_1_kl_1e-1.sh │ │ ├── iter_1_kl_1e-1_annealing.sh │ │ ├── iter_1_kl_1e-1_ema.sh │ │ ├── iter_1_kl_1e-2.sh │ │ ├── iter_1_kl_1e-3.sh │ │ ├── iter_1_kl_1e0.sh │ │ ├── iter_2_kl_0.sh │ │ ├── iter_2_kl_1e-1.sh │ │ ├── iter_2_kl_1e-2.sh │ │ ├── iter_2_kl_1e-3.sh │ │ ├── iter_2_kl_1e0.sh │ │ ├── iter_3_kl_0.sh │ │ └── iter_3_kl_1e-1.sh │ └── train_rl_after_rl │ │ └── iter2.sh ├── train_with_rl_sac_her.py └── utils │ ├── load_data.ipynb │ ├── load_data.py │ ├── load_data_with_dones_and_next_obs.py │ ├── register_env.py │ ├── sb3_callbacks.py │ ├── sb3_env_utils.py │ ├── sb3_env_wrappers.py │ ├── sb3_eval_callback.py │ ├── sb3_evaluate_kl.py │ ├── sb3_evaluate_policy.py │ ├── sb3_schedule.py │ └── test │ └── test_nmr_waypoint_wrapper.py ├── requirements.txt ├── train_scripts ├── test │ ├── test_bc_with_dict_obs.py │ └── test_policy.py ├── train_with_bc_ppo.py ├── train_with_rl_bc_ppo.py └── train_with_rl_ppo.py └── utils_my ├── __init__.py ├── models └── ppo_with_bc_loss.py ├── sb3 ├── __init__.py ├── my_eval_callback.py ├── my_evaluate_policy.py ├── my_schedule.py ├── my_vec_frame_stack_env.py ├── my_wrappers.py ├── test.ipynb └── vec_env_helper.py └── scalar.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/README.md -------------------------------------------------------------------------------- /configs/env/env_config_for_ppo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/env/env_config_for_ppo.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_0/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-1/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-2/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-3/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e-4/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_1/lambda_1e0/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_0/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-1/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-2/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-3/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e-4/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_2/lambda_1e0/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_0/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-1/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-2/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-3/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e-4/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_3/lambda_1e0/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_0/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-1/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-2/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-3/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e-4/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_1.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_2.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_3.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_4.json -------------------------------------------------------------------------------- /configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/configs/train/iteration_4/lambda_1e0/ppo_bc_config_10hz_128_128_5.json -------------------------------------------------------------------------------- /demonstrations/rollout_trajs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demonstrations/rollout_trajs/rollout_by_pid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/rollout_trajs/rollout_by_pid.py -------------------------------------------------------------------------------- /demonstrations/rollout_trajs/rollout_by_pid_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/rollout_trajs/rollout_by_pid_parallel.py -------------------------------------------------------------------------------- /demonstrations/rollout_trajs/rollout_by_policy_and_update_demostrations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/rollout_trajs/rollout_by_policy_and_update_demostrations.py -------------------------------------------------------------------------------- /demonstrations/utils/augment_trajs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/utils/augment_trajs.py -------------------------------------------------------------------------------- /demonstrations/utils/load_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/utils/load_dataset.py -------------------------------------------------------------------------------- /demonstrations/utils/rename_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/utils/rename_files.py -------------------------------------------------------------------------------- /demonstrations/utils/smoothness/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demonstrations/utils/smoothness/fourier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/utils/smoothness/fourier.py -------------------------------------------------------------------------------- /demonstrations/utils/smoothness/smoothness_measure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/utils/smoothness/smoothness_measure.py -------------------------------------------------------------------------------- /demonstrations/utils/smoothness/test_fft.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/utils/smoothness/test_fft.ipynb -------------------------------------------------------------------------------- /demonstrations/utils/smoothness/test_fft2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/utils/smoothness/test_fft2.ipynb -------------------------------------------------------------------------------- /demonstrations/utils/smoothness/test_fourier.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/demonstrations/utils/smoothness/test_fourier.ipynb -------------------------------------------------------------------------------- /exp_on_d4rl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/README.md -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/medium_halfcheetah_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/medium_halfcheetah_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/medium_halfcheetah_256_256_64envs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/medium_halfcheetah_256_256_64envs.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed1/medium_hopper_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed1/medium_hopper_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/medium_halfcheetah_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/medium_halfcheetah_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed2/medium_hopper_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed2/medium_hopper_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/medium_halfcheetah_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/medium_halfcheetah_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed3/medium_hopper_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed3/medium_hopper_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/medium_halfcheetah_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/medium_halfcheetah_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed4/medium_hopper_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed4/medium_hopper_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_halfcheetah_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_hopper_256_256_kl1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-1/medium_hopper_256_256_kl1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/medium_halfcheetah_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/medium_halfcheetah_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_1/seed5/medium_hopper_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_1/seed5/medium_hopper_256_256.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed1/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed1/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed2/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed2/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed3/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed3/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed4/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed4/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_0/medium_halfcheetah_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_0/medium_halfcheetah_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_0/medium_hopper_256_256_kl0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_0/medium_hopper_256_256_kl0.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_1/medium_halfcheetah_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_1/medium_halfcheetah_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_1/medium_hopper_256_256_kl1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_1/medium_hopper_256_256_kl1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_1e-1/medium_halfcheetah_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_1e-1/medium_hopper_256_256_kl1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_1e-1/medium_hopper_256_256_kl1e-1.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_1e-2/medium_halfcheetah_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_1e-2/medium_hopper_256_256_kl1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_1e-2/medium_hopper_256_256_kl1e-2.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_1e-3/medium_halfcheetah_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/iter_2/seed5/kl_1e-3/medium_hopper_256_256_kl1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/iter_2/seed5/kl_1e-3/medium_hopper_256_256_kl1e-3.json -------------------------------------------------------------------------------- /exp_on_d4rl/configs/load_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/configs/load_config.py -------------------------------------------------------------------------------- /exp_on_d4rl/evaluate/evaluate_kl.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/evaluate/evaluate_kl.ipynb -------------------------------------------------------------------------------- /exp_on_d4rl/models/ray_mlp_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/models/ray_mlp_model.py -------------------------------------------------------------------------------- /exp_on_d4rl/models/sb3_cnn_model.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exp_on_d4rl/models/sb3_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/models/sb3_model.py -------------------------------------------------------------------------------- /exp_on_d4rl/rollout/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## 数据采样方法 3 | 4 | 1. -------------------------------------------------------------------------------- /exp_on_d4rl/rollout/load_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/rollout/load_data.py -------------------------------------------------------------------------------- /exp_on_d4rl/rollout/rollout.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/rollout/rollout.ipynb -------------------------------------------------------------------------------- /exp_on_d4rl/rollout/rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/rollout/rollout.py -------------------------------------------------------------------------------- /exp_on_d4rl/rollout/rollout.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/rollout/rollout.sh -------------------------------------------------------------------------------- /exp_on_d4rl/rollout/rollout_by_multi_policies.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/rollout/rollout_by_multi_policies.ipynb -------------------------------------------------------------------------------- /exp_on_d4rl/sb3_bc_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/sb3_bc_train.py -------------------------------------------------------------------------------- /exp_on_d4rl/sb3_gail_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/sb3_gail_train.py -------------------------------------------------------------------------------- /exp_on_d4rl/sb3_rl_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/sb3_rl_train.py -------------------------------------------------------------------------------- /exp_on_d4rl/sb3_rl_train_after_bc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/sb3_rl_train_after_bc.py -------------------------------------------------------------------------------- /exp_on_d4rl/sb3_rl_train_after_bc_ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/sb3_rl_train_after_bc_ema.py -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_all/iter_1/medium_halfcheetah_kl_1e-1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_all/iter_1/medium_halfcheetah_kl_1e-1.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_bc/medium/iter_1/halfcheetah.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_bc/medium/iter_1/halfcheetah.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_bc/medium/iter_1/hopper.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_bc/medium/iter_1/hopper.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_bc/medium/iter_2/halfcheetah.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_bc/medium/iter_2/halfcheetah.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_bc/medium/iter_2/hopper.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_bc/medium/iter_2/hopper.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_rl/halfcheetah.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_rl/halfcheetah.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_rl/hopper.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_rl/hopper.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_1/halfcheetah.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_1/halfcheetah.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_1/halfcheetah_annealing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_1/halfcheetah_annealing.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_1/hopper.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_1/hopper.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_2/halfcheetah.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_2/halfcheetah.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_2/halfcheetah_kl_1e-1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_2/halfcheetah_kl_1e-1.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_2/hopper.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_2/hopper.sh -------------------------------------------------------------------------------- /exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_2/hopper_kl_1e-1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/scripts/train_rl_after_bc/medium/iter_2/hopper_kl_1e-1.sh -------------------------------------------------------------------------------- /exp_on_d4rl/utils/load_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/utils/load_data.py -------------------------------------------------------------------------------- /exp_on_d4rl/utils/load_data_with_dones_and_next_obs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/utils/load_data_with_dones_and_next_obs.py -------------------------------------------------------------------------------- /exp_on_d4rl/utils/sb3_callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/utils/sb3_callbacks.py -------------------------------------------------------------------------------- /exp_on_d4rl/utils/sb3_env_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/utils/sb3_env_utils.py -------------------------------------------------------------------------------- /exp_on_d4rl/utils/sb3_env_wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/utils/sb3_env_wrappers.py -------------------------------------------------------------------------------- /exp_on_d4rl/utils/sb3_eval_callback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/utils/sb3_eval_callback.py -------------------------------------------------------------------------------- /exp_on_d4rl/utils/sb3_evaluate_kl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/utils/sb3_evaluate_kl.py -------------------------------------------------------------------------------- /exp_on_d4rl/utils/sb3_evaluate_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/utils/sb3_evaluate_policy.py -------------------------------------------------------------------------------- /exp_on_d4rl/utils/sb3_schedule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_d4rl/utils/sb3_schedule.py -------------------------------------------------------------------------------- /exp_on_panda/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/README.md -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_0_64envs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_0_64envs.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_mr_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_mr_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_nmr_jump_2_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_nmr_jump_2_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_nmr_jump_2_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_nmr_jump_2_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_nmr_jump_3_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_nmr_jump_3_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_nmr_jump_3_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_nmr_jump_3_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_nmr_waypoint_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_nmr_waypoint_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_nmr_waypoint_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_nmr_waypoint_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed1/reacher_nmr_waypoint_sac_her_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed1/reacher_nmr_waypoint_sac_her_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_mr_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_mr_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_nmr_jump_2_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_nmr_jump_2_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_nmr_jump_2_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_nmr_jump_2_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_nmr_jump_3_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_nmr_jump_3_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_nmr_jump_3_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_nmr_jump_3_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_nmr_waypoint_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_nmr_waypoint_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_nmr_waypoint_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_nmr_waypoint_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed2/reacher_nmr_waypoint_sac_her_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed2/reacher_nmr_waypoint_sac_her_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_mr_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_mr_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_nmr_jump_2_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_nmr_jump_2_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_nmr_jump_2_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_nmr_jump_2_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_nmr_jump_3_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_nmr_jump_3_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_nmr_jump_3_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_nmr_jump_3_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_nmr_waypoint_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_nmr_waypoint_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_nmr_waypoint_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_nmr_waypoint_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed3/reacher_nmr_waypoint_sac_her_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed3/reacher_nmr_waypoint_sac_her_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_mr_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_mr_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_nmr_jump_2_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_nmr_jump_2_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_nmr_jump_2_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_nmr_jump_2_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_nmr_jump_3_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_nmr_jump_3_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_nmr_jump_3_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_nmr_jump_3_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_nmr_waypoint_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_nmr_waypoint_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_nmr_waypoint_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_nmr_waypoint_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed4/reacher_nmr_waypoint_sac_her_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed4/reacher_nmr_waypoint_sac_her_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-1_annealing.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-1_annealing.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-1_ema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-1_ema.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_mr_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_mr_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_nmr_jump_2_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_nmr_jump_2_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_nmr_jump_2_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_nmr_jump_2_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_nmr_jump_3_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_nmr_jump_3_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_nmr_jump_3_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_nmr_jump_3_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_nmr_waypoint_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_nmr_waypoint_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_nmr_waypoint_sac_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_nmr_waypoint_sac_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_1/seed5/reacher_nmr_waypoint_sac_her_256_256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_1/seed5/reacher_nmr_waypoint_sac_her_256_256.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed1/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed2/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed3/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed4/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_2/seed5/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed1/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed2/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed3/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed4/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e-1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e-1.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e-1_2e7steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e-1_2e7steps.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e-2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e-2.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e-3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e-3.json -------------------------------------------------------------------------------- /exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/iter_3/seed5/reacher_256_256_kl_1e0.json -------------------------------------------------------------------------------- /exp_on_panda/configs/load_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/configs/load_config.py -------------------------------------------------------------------------------- /exp_on_panda/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/evaluate.py -------------------------------------------------------------------------------- /exp_on_panda/models/sb3_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/models/sb3_model.py -------------------------------------------------------------------------------- /exp_on_panda/my_reach_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/my_reach_env.py -------------------------------------------------------------------------------- /exp_on_panda/rollout/discoutinuity/rollout_my_reach_by_rl_bc.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/rollout/discoutinuity/rollout_my_reach_by_rl_bc.ipynb -------------------------------------------------------------------------------- /exp_on_panda/rollout/rollout_by_pid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/rollout/rollout_by_pid.py -------------------------------------------------------------------------------- /exp_on_panda/rollout/rollout_by_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/rollout/rollout_by_policy.py -------------------------------------------------------------------------------- /exp_on_panda/rollout/rollout_my_reach_by_bc.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/rollout/rollout_my_reach_by_bc.ipynb -------------------------------------------------------------------------------- /exp_on_panda/rollout/rollout_my_reach_by_pid.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/rollout/rollout_my_reach_by_pid.ipynb -------------------------------------------------------------------------------- /exp_on_panda/rollout/rollout_my_reach_by_rl_bc.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/rollout/rollout_my_reach_by_rl_bc.ipynb -------------------------------------------------------------------------------- /exp_on_panda/rollout/rollout_my_reach_waypoint_by_pid.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/rollout/rollout_my_reach_waypoint_by_pid.ipynb -------------------------------------------------------------------------------- /exp_on_panda/rollout/rollout_reach_by_pid.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/rollout/rollout_reach_by_pid.ipynb -------------------------------------------------------------------------------- /exp_on_panda/rollout/utils/calc_statistics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/rollout/utils/calc_statistics.py -------------------------------------------------------------------------------- /exp_on_panda/sb3_bc_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/sb3_bc_train.py -------------------------------------------------------------------------------- /exp_on_panda/sb3_rl_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/sb3_rl_train.py -------------------------------------------------------------------------------- /exp_on_panda/sb3_rl_train_after_bc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/sb3_rl_train_after_bc.py -------------------------------------------------------------------------------- /exp_on_panda/sb3_rl_train_after_bc_with_ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/sb3_rl_train_after_bc_with_ema.py -------------------------------------------------------------------------------- /exp_on_panda/sb3_rl_train_after_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/sb3_rl_train_after_rl.py -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_bc/iter_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_bc/iter_1.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_bc/iter_2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_bc/iter_2.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_bc/iter_3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_bc/iter_3.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl/iter_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl/iter_1.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_0.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-1.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-1_annealing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-1_annealing.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-1_ema.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-1_ema.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-2.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e-3.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_1_kl_1e0.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_0.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_1e-1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_1e-1.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_1e-2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_1e-2.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_1e-3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_1e-3.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_1e0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_2_kl_1e0.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_3_kl_0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_3_kl_0.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_bc/iter_3_kl_1e-1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_bc/iter_3_kl_1e-1.sh -------------------------------------------------------------------------------- /exp_on_panda/scripts/train_rl_after_rl/iter2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/scripts/train_rl_after_rl/iter2.sh -------------------------------------------------------------------------------- /exp_on_panda/train_with_rl_sac_her.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/train_with_rl_sac_her.py -------------------------------------------------------------------------------- /exp_on_panda/utils/load_data.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/load_data.ipynb -------------------------------------------------------------------------------- /exp_on_panda/utils/load_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/load_data.py -------------------------------------------------------------------------------- /exp_on_panda/utils/load_data_with_dones_and_next_obs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/load_data_with_dones_and_next_obs.py -------------------------------------------------------------------------------- /exp_on_panda/utils/register_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/register_env.py -------------------------------------------------------------------------------- /exp_on_panda/utils/sb3_callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/sb3_callbacks.py -------------------------------------------------------------------------------- /exp_on_panda/utils/sb3_env_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/sb3_env_utils.py -------------------------------------------------------------------------------- /exp_on_panda/utils/sb3_env_wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/sb3_env_wrappers.py -------------------------------------------------------------------------------- /exp_on_panda/utils/sb3_eval_callback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/sb3_eval_callback.py -------------------------------------------------------------------------------- /exp_on_panda/utils/sb3_evaluate_kl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/sb3_evaluate_kl.py -------------------------------------------------------------------------------- /exp_on_panda/utils/sb3_evaluate_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/sb3_evaluate_policy.py -------------------------------------------------------------------------------- /exp_on_panda/utils/sb3_schedule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/sb3_schedule.py -------------------------------------------------------------------------------- /exp_on_panda/utils/test/test_nmr_waypoint_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/exp_on_panda/utils/test/test_nmr_waypoint_wrapper.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/requirements.txt -------------------------------------------------------------------------------- /train_scripts/test/test_bc_with_dict_obs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/train_scripts/test/test_bc_with_dict_obs.py -------------------------------------------------------------------------------- /train_scripts/test/test_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/train_scripts/test/test_policy.py -------------------------------------------------------------------------------- /train_scripts/train_with_bc_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/train_scripts/train_with_bc_ppo.py -------------------------------------------------------------------------------- /train_scripts/train_with_rl_bc_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/train_scripts/train_with_rl_bc_ppo.py -------------------------------------------------------------------------------- /train_scripts/train_with_rl_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/train_scripts/train_with_rl_ppo.py -------------------------------------------------------------------------------- /utils_my/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils_my/models/ppo_with_bc_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/utils_my/models/ppo_with_bc_loss.py -------------------------------------------------------------------------------- /utils_my/sb3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils_my/sb3/my_eval_callback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/utils_my/sb3/my_eval_callback.py -------------------------------------------------------------------------------- /utils_my/sb3/my_evaluate_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/utils_my/sb3/my_evaluate_policy.py -------------------------------------------------------------------------------- /utils_my/sb3/my_schedule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/utils_my/sb3/my_schedule.py -------------------------------------------------------------------------------- /utils_my/sb3/my_vec_frame_stack_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/utils_my/sb3/my_vec_frame_stack_env.py -------------------------------------------------------------------------------- /utils_my/sb3/my_wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/utils_my/sb3/my_wrappers.py -------------------------------------------------------------------------------- /utils_my/sb3/test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/utils_my/sb3/test.ipynb -------------------------------------------------------------------------------- /utils_my/sb3/vec_env_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/utils_my/sb3/vec_env_helper.py -------------------------------------------------------------------------------- /utils_my/scalar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GongXudong/IRPO/HEAD/utils_my/scalar.py --------------------------------------------------------------------------------