├── abr-puffer ├── bayes_opt │ ├── env │ │ ├── __init__.py │ │ ├── csim_mlp.py │ │ ├── bba.py │ │ └── bola.py │ ├── utils.py │ ├── create_dataset.py │ └── plot_paretos.py ├── assets │ ├── 2020-07-27to2021-06-01_linear_bba_SL_trained_models │ │ └── 10000_predictor.pth │ ├── 2020-07-27to2021-06-01_bola_basic_v1_SL_trained_models │ │ └── 10000_predictor.pth │ ├── 2020-07-27to2021-06-01_bola_basic_v2_SL_trained_models │ │ └── 10000_predictor.pth │ ├── 2020-07-27to2021-06-01_linear_bba_trained_models │ │ └── inner_loop_10 │ │ │ ├── C_0.1 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_0.5 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_1.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_5.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_0.05 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_10.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_15.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_20.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_25.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_30.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ └── C_40.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ ├── 2020-07-27to2021-06-01_bola_basic_v1_trained_models │ │ └── inner_loop_10 │ │ │ ├── C_0.05 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_0.1 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_0.5 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_1.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_10.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_15.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_20.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_25.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_30.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ ├── C_40.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ │ │ └── C_5.0 │ │ │ ├── 5000_predictor.pth │ │ │ ├── 5000_discriminator.pth │ │ │ └── 5000_feature_extractor.pth │ └── 2020-07-27to2021-06-01_bola_basic_v2_trained_models │ │ └── inner_loop_10 │ │ ├── C_0.05 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ ├── C_0.1 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ ├── C_0.5 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ ├── C_1.0 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ ├── C_10.0 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ ├── C_15.0 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ ├── C_20.0 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ ├── C_25.0 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ ├── C_30.0 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ ├── C_40.0 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth │ │ └── C_5.0 │ │ ├── 5000_predictor.pth │ │ ├── 5000_discriminator.pth │ │ └── 5000_feature_extractor.pth ├── data_preparation │ ├── istarmap.py │ └── generate_subset_data.py ├── analysis │ ├── tune_downloadtime_hyperparams.py │ ├── tune_buffer_hyperparams.py │ ├── sl_subset_stall.py │ ├── sl_subset_ssim.py │ ├── subset_ssim.py │ ├── subset_stall.py │ ├── original_subset_ssim.py │ └── original_subset_stall.py ├── visualization │ ├── fig7a.py │ ├── fig4.py │ └── 7b.py ├── training │ ├── sl_subset_train.py │ └── train_subset.py ├── inference │ └── extract_subset_latents.py └── README.md ├── abr-synthetic ├── cpolicies │ ├── __init__.py │ ├── Makefile │ ├── setup.py │ └── mpc.pyx ├── env │ ├── __init__.py │ ├── video_sizes.npy │ ├── trace_loader.py │ └── abr.py ├── nn_util.py ├── README.md ├── slsim.py ├── generate_cfs.py ├── create_dataset_and_expertsim.py ├── generate_traces.py └── causalsim.py ├── .gitignore ├── load-balancing ├── nn_util.py ├── generate_cfs.py ├── README.md ├── slsim.py ├── main_slsim.py ├── causalsim.py └── latency_util.py └── Readme.md /abr-puffer/bayes_opt/env/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /abr-synthetic/cpolicies/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /abr-synthetic/env/__init__.py: -------------------------------------------------------------------------------- 1 | from env.abr import ABRSimEnv 2 | -------------------------------------------------------------------------------- /abr-synthetic/env/video_sizes.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-synthetic/env/video_sizes.npy -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_SL_trained_models/10000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_SL_trained_models/10000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_SL_trained_models/10000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_SL_trained_models/10000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_SL_trained_models/10000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_SL_trained_models/10000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.1/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.1/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.5/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.5/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_1.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_1.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_5.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_5.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.05/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.05/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_10.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_10.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_15.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_15.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_20.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_20.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_25.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_25.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_30.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_30.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_40.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_40.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.05/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.05/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.1/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.1/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.5/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.5/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_1.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_1.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_10.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_10.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_15.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_15.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_20.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_20.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_25.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_25.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_30.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_30.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_40.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_40.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_5.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_5.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.05/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.05/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.1/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.1/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.5/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.5/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_1.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_1.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_10.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_10.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_15.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_15.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_20.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_20.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_25.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_25.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_30.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_30.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_40.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_40.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_5.0/5000_predictor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_5.0/5000_predictor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.05/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.05/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.1/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.1/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.5/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.5/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_1.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_1.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_10.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_10.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_15.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_15.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_20.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_20.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_25.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_25.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_30.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_30.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_40.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_40.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_5.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_5.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.1/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.1/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.5/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.5/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_1.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_1.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_5.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_5.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.1/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.1/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.5/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.5/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_1.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_1.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_5.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_5.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.05/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.05/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_10.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_10.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_15.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_15.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_20.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_20.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_25.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_25.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_30.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_30.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_40.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_40.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.05/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.05/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_10.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_10.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_15.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_15.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_20.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_20.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_25.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_25.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_30.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_30.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_40.0/5000_discriminator.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_40.0/5000_discriminator.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.05/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.05/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.1/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.1/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.5/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_0.5/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_1.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_1.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_10.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_10.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_15.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_15.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_20.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_20.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_25.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_25.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_30.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_30.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_40.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_40.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_5.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_linear_bba_trained_models/inner_loop_10/C_5.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.05/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.05/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.1/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.1/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.5/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_0.5/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_1.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_1.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_10.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_10.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_15.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_15.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_20.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_20.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_25.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_25.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_30.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_30.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_40.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_40.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_5.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v1_trained_models/inner_loop_10/C_5.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.05/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.05/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.1/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.1/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.5/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_0.5/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_1.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_1.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_10.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_10.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_15.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_15.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_20.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_20.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_25.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_25.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_30.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_30.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_40.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_40.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_5.0/5000_feature_extractor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CausalSim/Unbiased-Trace-Driven-Simulation/HEAD/abr-puffer/assets/2020-07-27to2021-06-01_bola_basic_v2_trained_models/inner_loop_10/C_5.0/5000_feature_extractor.pth -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | abr-synthetic/__pycache__/* 2 | abr-synthetic/cfs/* 3 | abr-synthetic/env/* 4 | abr-synthetic/directory/* 5 | abr-synthetic/cpolicies/build/* 6 | abr-synthetic/results.ipynb 7 | load-balancing/results.ipynb 8 | load-balancing/__pycache__/* 9 | load-balancing/cfs/* 10 | load-balancing/directory/* 11 | -------------------------------------------------------------------------------- /abr-synthetic/cpolicies/Makefile: -------------------------------------------------------------------------------- 1 | all: clean build 2 | 3 | default: build 4 | 5 | ############### FILES ################# 6 | FILES=mpc.pyx 7 | 8 | ############## COMMANDS ############### 9 | build: ${FILES} 10 | python3 setup.py build_ext -i 11 | 12 | clean: 13 | rm -rf build 14 | rm -f *.cpython*.so mpc.cpp 15 | ####################################### -------------------------------------------------------------------------------- /abr-puffer/bayes_opt/env/csim_mlp.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class MLP(nn.Module): 5 | def __init__(self, input_dim, output_dim, hidden_sizes, activation): 6 | super().__init__() 7 | sizes = [input_dim] + list(hidden_sizes) + [output_dim] 8 | layers = [] 9 | for j in range(len(sizes) - 1): 10 | act = activation if j < len(sizes) - 2 else nn.Identity 11 | layers += [nn.Linear(sizes[j], sizes[j + 1]), act()] 12 | self.predict = nn.Sequential(*layers) 13 | 14 | def forward(self, raw_input): 15 | prediction = self.predict(raw_input) 16 | return prediction 17 | -------------------------------------------------------------------------------- /abr-synthetic/nn_util.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def mlp(sizes, activation, output_activation=nn.Identity): 5 | layers = [] 6 | for j in range(len(sizes) - 1): 7 | act = activation if j < len(sizes) - 2 else output_activation 8 | layers += [nn.Linear(sizes[j], sizes[j + 1]), act()] 9 | return nn.Sequential(*layers) 10 | 11 | 12 | class MLP(nn.Module): 13 | def __init__(self, input_dim, output_dim, hidden_sizes, activation): 14 | super().__init__() 15 | self.predict = mlp( 16 | sizes=[input_dim] + list(hidden_sizes) + [output_dim], 17 | activation=activation, 18 | output_activation=nn.Identity, 19 | ) 20 | 21 | def forward(self, raw_input): 22 | prediction = self.predict(raw_input) 23 | return prediction 24 | -------------------------------------------------------------------------------- /load-balancing/nn_util.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def mlp(sizes, activation, output_activation=nn.Identity): 5 | layers = [] 6 | for j in range(len(sizes) - 1): 7 | act = activation if j < len(sizes) - 2 else output_activation 8 | layers += [nn.Linear(sizes[j], sizes[j + 1]), act()] 9 | return nn.Sequential(*layers) 10 | 11 | 12 | class MLP(nn.Module): 13 | def __init__(self, input_dim, output_dim, hidden_sizes, activation): 14 | super().__init__() 15 | self.predict = mlp( 16 | sizes=[input_dim] + list(hidden_sizes) + [output_dim], 17 | activation=activation, 18 | output_activation=nn.Identity, 19 | ) 20 | 21 | def forward(self, raw_input): 22 | prediction = self.predict(raw_input) 23 | return prediction 24 | -------------------------------------------------------------------------------- /abr-puffer/bayes_opt/utils.py: -------------------------------------------------------------------------------- 1 | from termcolor import colored 2 | import pickle 3 | import os 4 | import torch 5 | 6 | 7 | def set_omp_thrs(num: int = 1): 8 | assert num > 0 9 | assert num <= 256 10 | os.environ['OMP_NUM_THREADS'] = f'{num}' 11 | torch.set_num_threads(num) 12 | 13 | 14 | def save_args(config, path: str): 15 | args_dict = vars(config) 16 | with open(f'{path}/args.pkl', 'wb') as handle: 17 | pickle.dump(args_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) 18 | 19 | 20 | def make_folders(output_folder: str): 21 | os.makedirs(output_folder, exist_ok=True) 22 | exit_run = False 23 | 24 | if os.path.exists(f'{output_folder}/rewards_train.npy'): 25 | exit_run = True 26 | 27 | if exit_run: 28 | print(colored('Results already exist in output folder', 'red')) 29 | print(colored('Possibility of overwrite, exiting', 'red')) 30 | exit(1) 31 | -------------------------------------------------------------------------------- /abr-puffer/data_preparation/istarmap.py: -------------------------------------------------------------------------------- 1 | # istarmap.py for Python 3.8+ 2 | # https://stackoverflow.com/questions/57354700/starmap-combined-with-tqdm 3 | import multiprocessing.pool as mpp 4 | 5 | 6 | def istarmap(self, func, iterable, chunksize=1): 7 | """starmap-version of imap 8 | """ 9 | self._check_running() 10 | if chunksize < 1: 11 | raise ValueError( 12 | "Chunksize must be 1+, not {0:n}".format( 13 | chunksize)) 14 | 15 | task_batches = mpp.Pool._get_tasks(func, iterable, chunksize) 16 | result = mpp.IMapIterator(self) 17 | self._taskqueue.put( 18 | ( 19 | self._guarded_task_generation(result._job, 20 | mpp.starmapstar, 21 | task_batches), 22 | result._set_length 23 | )) 24 | return (item for chunk in result for item in chunk) 25 | 26 | 27 | mpp.Pool.istarmap = istarmap -------------------------------------------------------------------------------- /abr-synthetic/cpolicies/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from Cython.Build import cythonize 3 | import numpy 4 | from setuptools.command.build_ext import build_ext 5 | 6 | 7 | # Avoid a gcc warning below: 8 | # cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid 9 | # for C/ObjC but not for C++ 10 | class BuildExtDebug(build_ext): 11 | def build_extensions(self): 12 | if '-Wstrict-prototypes' in self.compiler.compiler_so: 13 | self.compiler.compiler_so.remove('-Wstrict-prototypes') 14 | super().build_extensions() 15 | 16 | 17 | extension = cythonize( 18 | [ 19 | Extension('mpc', 20 | ['mpc.pyx'], 21 | language="c++", 22 | extra_compile_args=["-std=c++14"], 23 | include_dirs=[numpy.get_include()] 24 | ) 25 | ], 26 | compiler_directives={'language_level': "3"} 27 | ) 28 | 29 | setup( 30 | # Information 31 | name="cpol", 32 | ext_modules=extension, 33 | cmdclass={'build_ext': BuildExtDebug} 34 | ) 35 | -------------------------------------------------------------------------------- /abr-puffer/bayes_opt/env/bba.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .abr import ABRSimEnv 4 | 5 | 6 | class BBA(object): 7 | MAX_BUF_S = ABRSimEnv.MAX_BUFFER_S 8 | 9 | def __init__(self, act_len: int, reservoir: float, cushion: float): 10 | self.act_n = act_len 11 | self.upper = reservoir + cushion 12 | self.lower = reservoir 13 | 14 | def sample_action(self, obs: np.ndarray) -> int: 15 | invalid_mask = np.logical_or(np.isnan(obs[-2 * self.act_n: -self.act_n]), 16 | np.isnan(obs[-self.act_n:])) 17 | size_arr_valid = np.ma.array(obs[-2 * self.act_n: -self.act_n], mask=invalid_mask) 18 | ssim_arr_valid = np.ma.array(obs[-self.act_n:], mask=invalid_mask) 19 | min_choice = size_arr_valid.argmin() 20 | max_choice = size_arr_valid.argmax() 21 | buffer = (obs[0] + 1) / 2 * self.MAX_BUF_S 22 | if buffer < self.lower: 23 | act = min_choice 24 | elif buffer >= self.upper: 25 | act = max_choice 26 | else: 27 | ratio = (buffer - self.lower) / float(self.upper - self.lower) 28 | min_chunk = size_arr_valid[min_choice] 29 | max_chunk = size_arr_valid[max_choice] 30 | bitrate = ratio * (max_chunk - min_chunk) + min_chunk 31 | mask = np.logical_or(invalid_mask, size_arr_valid > bitrate) 32 | act = np.ma.array(ssim_arr_valid, mask=mask).argmax() 33 | return act 34 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # CausalSim: A Causal Framework for Unbiased Trace-Driven Simulation 2 | We present CausalSim, a causal framework for unbiased trace-driven simulation. Current trace-driven simulators assume that the interventions being simulated (e.g., a new algorithm) would not affect the validity of the traces. However, real-world traces are often biased by the choices algorithms make during trace collection, and hence replaying traces under an intervention may lead to incorrect results. CausalSim addresses this challenge by learning a causal model of the system dynamics and latent factors capturing the underlying system conditions during trace collection. It learns these models using an initial randomized control trial (RCT) under a fixed set of algorithms, and then applies them to remove biases from trace data when simulating new algorithms. 3 | 4 | Key to CausalSim is mapping unbiased trace-driven simulation to a tensor completion problem with extremely sparse observations. By exploiting a basic distributional invariance property present in RCT data, CausalSim enables a novel tensor completion method despite the sparsity of observations. Our extensive evaluation of CausalSim on both real and synthetic datasets, including more than ten months of real data from the Puffer video streaming system shows it improves simulation accuracy, reducing errors by 53% and 61% on average compared to expert-designed and supervised learning baselines. Moreover, CausalSim provides markedly different insights about ABR algorithms compared to the biased baseline simulator, which we validate with a real deployment. 5 | -------------------------------------------------------------------------------- /abr-puffer/analysis/tune_downloadtime_hyperparams.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import pickle 4 | import os 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("--dir", help="source directory") 8 | args = parser.parse_args() 9 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 10 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 11 | downloadtime_hyperparams = {policy: [] for policy in buffer_based_names} 12 | 13 | C_list = ['0.05', '0.1', '0.5', '1.0', '5.0', '10.0', '15.0', '20.0', '25.0', '30.0', '40.0'] 14 | 15 | for left_out_policy in buffer_based_names: 16 | sim_data = {C: {policy: [] for policy in buffer_based_names} for C in C_list} 17 | with open(f'{args.dir}subset_orig_rebuff_dicts/{left_out_policy}/orig_rebuffs.pkl', 'rb') as f: 18 | orig_dict = pickle.load(f) 19 | for C in C_list: 20 | with open(f'{args.dir}subset_stall_dicts/{left_out_policy}/stalls_{C}.pkl', 'rb') as f: 21 | sim_dict = pickle.load(f) 22 | for policy in buffer_based_names: 23 | sim_data[C][policy].append(100 * abs(np.sum(sim_dict[policy]['rebuffs']) / np.sum(sim_dict[policy]['lens']) - np.sum(orig_dict[policy]['rebuffs']) / np.sum(orig_dict[policy]['lens']))) 24 | downloadtime_hyperparams[left_out_policy] = [C_list[np.argmin( 25 | [np.mean([sim_data[C][policy] for policy in buffer_based_names if policy != left_out_policy]) for C in C_list])]] 26 | 27 | hyperparam_path = f'{args.dir}tuned_hyperparams' 28 | os.makedirs(hyperparam_path, exist_ok=True) 29 | with open(f'{hyperparam_path}/downloadtime.pkl', 'wb') as f: 30 | pickle.dump(downloadtime_hyperparams, f) 31 | -------------------------------------------------------------------------------- /abr-puffer/visualization/fig7a.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pickle 3 | import numpy as np 4 | import argparse 5 | import os 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--dir", help="root directory") 9 | args = parser.parse_args() 10 | 11 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 12 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 13 | sl_EMDs, expert_EMDs, sim_EMDs = [], [], [] 14 | with open(f'{args.dir}tuned_hyperparams/buffer.pkl', 'rb') as f: 15 | f = pickle.load(f) 16 | bf_C = {policy: f[policy][1] for policy in buffer_based_names} 17 | for left_out_policy in buffer_based_names: 18 | with open(f'{args.dir}subset_EMDs/{left_out_policy}/sim_buff_{bf_C[left_out_policy]}.pkl', 'rb') as f: 19 | sim_dict = pickle.load(f) 20 | with open(f'{args.dir}subset_EMDs/{left_out_policy}/expert_buff_{bf_C[left_out_policy]}.pkl', 'rb') as f: 21 | expert_dict = pickle.load(f) 22 | with open(f'{args.dir}subset_EMDs/{left_out_policy}/sl_buff_{bf_C[left_out_policy]}.pkl', 'rb') as f: 23 | sl_dict = pickle.load(f) 24 | sl_EMDs.extend([sl_dict[source][left_out_policy] for source in policy_names if source != left_out_policy]) 25 | expert_EMDs.extend([expert_dict[source][left_out_policy] for source in policy_names if source != left_out_policy]) 26 | sim_EMDs.extend([sim_dict[source][left_out_policy] for source in policy_names if source != left_out_policy]) 27 | plt.figure(figsize=(3.25, 2.25)) 28 | sl_EMDs = np.sort(sl_EMDs) 29 | expert_EMDs = np.sort(expert_EMDs) 30 | sim_EMDs = np.sort(sim_EMDs) 31 | plt.plot(sl_EMDs, [i/12*100 for i in range(1, 13)], label='SLSim') 32 | plt.plot(expert_EMDs, [i/12*100 for i in range(1, 13)], label='ExpertSim') 33 | plt.plot(sim_EMDs, [i/12*100 for i in range(1, 13)], label='CausalSim') 34 | plt.legend() 35 | plt.ylabel('CDF %') 36 | plt.xlabel('EMD') 37 | 38 | fig_path = f'{args.dir}plots' 39 | os.makedirs(fig_path, exist_ok=True) 40 | plt.savefig(f'{fig_path}/fig7a.pdf', format='pdf') -------------------------------------------------------------------------------- /load-balancing/generate_cfs.py: -------------------------------------------------------------------------------- 1 | from latency_util import * 2 | 3 | 4 | def generate_cfs( 5 | dict_exp, 6 | training_datapath, 7 | models_path, 8 | test_policy_idx, 9 | alg="causalsim", 10 | N_test=5000, 11 | r=None, 12 | ): 13 | 14 | job_size = dict_exp["job_size"][:N_test, :] 15 | inter_arrs = dict_exp["ia_time"][:N_test, :] 16 | 17 | pts = np.load(f"{training_datapath}/raw_train_pt.npy") 18 | pt_mean = 0 19 | pt_std = np.std(pts) 20 | 21 | train_trajectories = np.load( 22 | f"{training_datapath}/train_trajectories.npy", allow_pickle=True 23 | ) 24 | 25 | actions_obs = train_trajectories[:N_test, :, 0] 26 | ptimes_obs = train_trajectories[:N_test, :, 1] 27 | if alg == "causalsim": 28 | assert r is not None 29 | 30 | feature_extractor = torch.load( 31 | "%sbest_feature_extractor.pth" % models_path, 32 | map_location=torch.device("cpu"), 33 | ) 34 | action_factor = torch.load( 35 | "%sbest_action_factor.pth" % models_path, map_location=torch.device("cpu") 36 | ) 37 | 38 | features, actions, proc_times, latencies = collect_traces_sim_traj_fact( 39 | job_size, 40 | inter_arrs, 41 | ptimes_obs, 42 | actions_obs, 43 | feature_extractor, 44 | action_factor, 45 | r, 46 | pt_mean, 47 | pt_std, 48 | test_pol_idx=test_policy_idx, 49 | p_change=0, 50 | ) 51 | elif alg == "slsim": 52 | buffer_predictor = torch.load( 53 | "%sbest_buffer_predictor.pth" % models_path, 54 | map_location=torch.device("cpu"), 55 | ) 56 | actions, proc_times, latencies = collect_traces_direct_traj( 57 | job_size, 58 | inter_arrs, 59 | ptimes_obs, 60 | buffer_predictor, 61 | pt_mean, 62 | pt_std, 63 | test_pol_idx=test_policy_idx, 64 | p_change=0, 65 | ) 66 | features = None 67 | else: 68 | raise ValueError(f"Unknown value {alg} for alg") 69 | 70 | cf = np.zeros([8, N_test, 1000, 3]) 71 | cf[:, :, :, 0] = proc_times 72 | cf[:, :, :, 1] = latencies 73 | cf[:, :, :, 2] = actions 74 | 75 | return cf, features 76 | -------------------------------------------------------------------------------- /abr-synthetic/README.md: -------------------------------------------------------------------------------- 1 | # Synthetic Adaptive BitRate (ABR) simulation with CausalSim 2 | 3 | To reproduce the results in the paper, we need to: 4 | 5 | 0. Install required python packages 6 | 1. Create the dataset and ExpertSim trajectories (~14GB). 7 | 2. Train, Infer and Tune CausalSim models, and plot results. 8 | 9 | --- 10 | ## 0. Python packages 11 | We use Python (3.8 tested) for all experiments. Install the following packages via `pip` or `conda` : 12 | ``` 13 | numpy, scipy, tqdm, matplotlib, scikit-learn, cython 14 | ``` 15 | Install PyTorch according to the website [instructions](https://pytorch.org). 16 | 17 | --- 18 | ## 1. Preparing the dataset 19 | 20 | Begin by compiling a fast cython-based MPC implementation: 21 | ``` 22 | cd cpolicies 23 | make all 24 | cd .. 25 | ``` 26 | 27 | Then, create a directory as a workspace for datasets, models, simulations and plots. We'll call this directory CAUSALSIM_DIR. 28 | Next, run the following command: 29 | ``` 30 | python3 create_dataset_and_expertsim.py --dir CAUSALSIM_DIR 31 | ``` 32 | This script will create traces and stream logs for a live-streaming session (maximum buffer is limited to 10 seconds), saved in `CAUSALSIM_DIR`. 33 | 34 | ## 2. Train/Infer/Tune/Plot 35 | 36 | For the synthetic ABR experiment, we generate counterfactual predictions for a synthetic ABR environment for one of the following policies: 37 | 38 | 1. **BBA**. 39 | 2. **BBAMIX-x1-50**. 40 | 3. **BBAMIX-x2-50**. 41 | 4. **MPC**. 42 | 5. **Random**. 43 | 6. **BOLA**. 44 | 7. **Rate Based**. 45 | 8. **Optimistic Rate Based**. 46 | 9. **Pessimistic Rate Based**. 47 | 48 | To run the experiment and generate the counterfactuals of policy `POLICY_NAME` (while leaving the policy out of the training data), run: 49 | 50 | 51 | ``` 52 | 53 | python3 main.py --policy_out POLICY_NAME --dir CAUSALSIM_DIR --slsim_loss mse_loss 54 | 55 | ``` 56 | 57 | Where `POLICY_NAME` can be any one of { 58 | "BBA", 59 | "BBAMIX-x1-50", 60 | "BBAMIX-x2-50", 61 | "MPC", 62 | "Random", 63 | "BOLA", 64 | "Rate Based", 65 | "Optimistic Rate Based", 66 | "Pessimistic Rate Based" 67 | }. 68 | 69 | and slsim loss should be one of the three losses `mse_loss`, `l1_loss`, or `huber_loss`. 70 | 71 | This will produce a plots (saved in `figures/`) showing the MSE of estimating the counterfactual buffer occupancy for both CausalSim and SLSim. -------------------------------------------------------------------------------- /abr-synthetic/env/trace_loader.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List 2 | import numpy as np 3 | from tqdm import trange 4 | from scipy.optimize import fsolve 5 | 6 | 7 | def load_traces(trace_folder: str, seed: int, length_trace: int, num_traces: int) -> Tuple[List[np.ndarray], 8 | np.ndarray]: 9 | all_traces, all_rtts = load_sim_traces_process(length=length_trace, seed=seed, num_traces=num_traces) 10 | np.save(trace_folder + '/traces.npy', all_traces) 11 | np.save(trace_folder + '/rtts.npy', all_rtts) 12 | 13 | return all_traces, all_rtts 14 | 15 | 16 | def load_sim_traces_process(length: int, num_traces: int, seed: int) -> Tuple[List[np.ndarray], np.ndarray]: 17 | rng = np.random.RandomState(seed) 18 | all_traces = [] 19 | print('Creating traces') 20 | for i in trange(num_traces): 21 | p_transition = 1 - 1 / rng.randint(30, 100) 22 | var_coeff = rng.random() * 0.25 + 0.05 23 | low_thresh, high_thresh = uniform_thresh(4.5, 0.5, rng) 24 | all_bandwidth = np.empty(length) 25 | state = rng.random() * (high_thresh-low_thresh) + low_thresh 26 | for j in range(length): 27 | all_bandwidth[j] = np.clip(rng.normal(state, state * var_coeff), low_thresh, high_thresh) 28 | if rng.random() > p_transition: 29 | state = doubly_exponential(state, high_thresh, low_thresh, rng) 30 | all_traces.append(all_bandwidth) 31 | 32 | all_rtts = np.random.RandomState(seed).random(size=len(all_traces)) * 490 + 10 33 | print('Created!!!') 34 | 35 | return all_traces, all_rtts 36 | 37 | 38 | def doubly_exponential(position: float, high: float, low: float, rng: np.random.RandomState) -> float: 39 | lamb = fsolve(lambda la: 1 - np.exp(-la * (high-position)) - np.exp(-la * (position-low)), np.array([0.5]))[0] 40 | rnd = rng.random() 41 | if rnd < 1 - np.exp(-lamb * (high-position)): 42 | return position - np.log(1-rnd)/lamb 43 | else: 44 | return position + np.log(rnd) / lamb 45 | 46 | 47 | def uniform_thresh(high: float, low: float, rng: np.random.RandomState) -> Tuple[float, float]: 48 | low_thresh, high_thresh = 1, 1 49 | while (high_thresh-low_thresh) / (high_thresh+low_thresh) < 0.3: 50 | threshes = rng.random(size=2) * (high - low) + low 51 | low_thresh, high_thresh = np.min(threshes), np.max(threshes) 52 | return low_thresh, high_thresh 53 | -------------------------------------------------------------------------------- /abr-synthetic/cpolicies/mpc.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | import cython 3 | from numpy.math cimport INFINITY 4 | import numpy as np 5 | cimport numpy as np 6 | np.import_array() 7 | 8 | 9 | def take_action_py(obs_np, act_n, vid_bit_rate, rebuf_penalty, mpc_lookback, mpc_lookahead, eps): 10 | next_chunks_len = min(mpc_lookahead-1, int(obs_np[2 * mpc_lookback + 1])) 11 | next_chunk_sizes = obs_np[3 + 2 * mpc_lookback:3 + 2 * mpc_lookback + act_n * next_chunks_len] 12 | past_bandwidths = np.trim_zeros(obs_np[:mpc_lookback], 'f') 13 | if len(past_bandwidths) > 0: 14 | harmonic_bandwidth = 1 / (1/past_bandwidths).mean() 15 | else: 16 | harmonic_bandwidth = eps 17 | future_bandwidth = harmonic_bandwidth 18 | return recursive_best_mpc_c(obs_np[2 * mpc_lookback], 0, next_chunks_len, int(obs_np[2 * mpc_lookback + 2]), 19 | next_chunk_sizes / future_bandwidth, act_n, vid_bit_rate, rebuf_penalty)[1] 20 | 21 | 22 | @cython.boundscheck(False) # turn off bounds-checking for entire function 23 | @cython.wraparound(False) # turn off negative index wrapping for entire function 24 | cdef recursive_best_mpc_c(double curr_buffer, int position, int recursions_left, int last_quality, 25 | double[:] download_times, int act_n, double[:] vid_bit_rate, 26 | double rebuf_penalty): 27 | 28 | if recursions_left == 0: 29 | assert position * act_n == len(download_times) 30 | return 0, 0 31 | 32 | cdef double best_reward = -INFINITY 33 | cdef int best_act = -1 34 | cdef int chunk_quality = 0 35 | cdef double reward_act 36 | cdef double buffer_act 37 | cdef double download_time 38 | 39 | for chunk_quality in range(act_n): 40 | reward_act = 0 41 | buffer_act = curr_buffer 42 | # this is MB/MB/s --> seconds 43 | download_time = download_times[position * act_n + chunk_quality] 44 | if buffer_act < download_time: 45 | reward_act -= rebuf_penalty * (download_time - buffer_act) 46 | buffer_act = 0 47 | else: 48 | buffer_act -= download_time 49 | buffer_act += 4 50 | reward_act += vid_bit_rate[chunk_quality] 51 | reward_act -= abs(vid_bit_rate[chunk_quality] - vid_bit_rate[last_quality]) 52 | reward_act += recursive_best_mpc_c(buffer_act, position+1, recursions_left-1, chunk_quality, 53 | download_times, act_n, vid_bit_rate, rebuf_penalty)[0] 54 | 55 | if best_reward < reward_act: 56 | best_reward = reward_act 57 | best_act = chunk_quality 58 | 59 | return best_reward, best_act 60 | -------------------------------------------------------------------------------- /load-balancing/README.md: -------------------------------------------------------------------------------- 1 | # Heterogeneous load balancing simulation with CausalSim 2 | 3 | To reproduce the results in the paper, we need to: 4 | 5 | 0. Install required python packages 6 | 1. Create the dataset (~2GB). 7 | 2. Train, Infer and Tune CausalSim models, and plot results. 8 | 9 | --- 10 | ## 0. Python packages 11 | We use Python (3.8 tested) for all experiments. Install the following packages via `pip` or `conda` : 12 | ``` 13 | numpy, tqdm, matplotlib, tensorboard, scikit-learn 14 | ``` 15 | Install PyTorch according to the website [instructions](https://pytorch.org). 16 | 17 | --- 18 | ## 1. Preparing the dataset 19 | 20 | First, create a directory as a workspace for datasets, models, simulations and plots. We'll call this directory CAUSALSIM_DIR. 21 | Next, run the following command: 22 | ``` 23 | python3 create_dataset.py --dir CAUSALSIM_DIR 24 | ``` 25 | This script will create a synthetic dataset for heterogeneous load balancing and save it in `CAUSALSIM_DIR`. 26 | 27 | ## 2. Train/Infer/Tune/Plot 28 | 29 | For the load balancing experiment, we generate counterfactual predictions for a synthetic load balancing environment for one of the following policies: 30 | 31 | 32 | 1. **Random**: a policy that assigns jobs to any server uniformly at random. 33 | 34 | 2. **Shortest Queue**: Assign to the server with the smallest queue. 35 | 36 | 3. **Pow2**: Poll queue lengths of 2 servers (randomly) and assign to shortest queue 37 | 38 | 4. **Pow3**: Poll queue lengths of 3 servers and assign to shortest queue. 39 | 40 | 5. **Pow4**: Poll queue lengths of 4 servers and assign to shortest queue. 41 | 42 | 6. **Pow5**: Poll queue lengths of 5 servers and assign to shortest queue. 43 | 44 | 7. **PseudoOptimal**: Normalize queue sizes with server rates and assign the job to the shortest normalized queue. 45 | 46 | 8. **Tracker**: Similar to PseudoOptimal, but estimates server rates with historical observations of processing times. 47 | 48 | 49 | To run the experiment and generate the counterfactuals of policy `POLICY_NAME` (while leaving the policy out of the training data), run: 50 | 51 | 52 | ``` 53 | 54 | python3 main.py --policy_out POLICY_NAME --dir CAUSALSIM_DIR --slsim_loss mse_loss 55 | 56 | ``` 57 | 58 | Where `POLICY_NAME` can be any one of { 59 | "random", 60 | "Shortest queue", 61 | "pow2", 62 | "pow3", 63 | "pow4", 64 | "pow5", 65 | "PseudoOptimal", 66 | "Tracker" 67 | }. 68 | 69 | and slsim loss should be one of the three losses `mse_loss`, `l1_loss`, or `huber_loss`. 70 | 71 | This will produce three plots (saved in `figures/`): 72 | 73 | 1. The MAPE of estimating the counterfactuals processing time of the jobs under the selected test policy using CausalSim and how it compares with SLSim. 74 | 75 | 2. The MAPE of estimating the counterfactual latencies of the jobs under the selected test policy using CausalSim and how it compares with SLSim. 76 | 77 | 3. CausalSim latent factors and how they compare with the actual job size. 78 | 79 | -------------------------------------------------------------------------------- /abr-puffer/bayes_opt/create_dataset.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import itertools 3 | import os 4 | import shutil 5 | 6 | import numpy as np 7 | from argparse import ArgumentParser 8 | from tqdm import tqdm 9 | 10 | from env.abr import ssim_db 11 | 12 | parser = ArgumentParser(description='Puffer RL dataset parameters') 13 | parser.add_argument('--dir', type=str, required=True, help='Puffer trace path') 14 | parser.add_argument('--buf_latent_dir', type=str, required=True, help='Path to load latent buffers from') 15 | parser.add_argument('--dt_latent_dir', type=str, required=True, help='Path to load download time latents from') 16 | config = parser.parse_args() 17 | 18 | 19 | def main(): 20 | # Each traces is a numpy array with the following: 21 | # 1) Buffer hidden feature, 22 | # 2) DT hidden feature, 23 | # 3) factual throughput, 24 | # 4) time between actions, 25 | # 5) min rtt, 26 | # 6-17) bitrate choices, 27 | # 18-29) ssim of choices in db 28 | 29 | os.makedirs(f"{config.dir}/gp_cooked/", exist_ok=True) 30 | 31 | all_ts = 0 32 | all_wts = [] 33 | 34 | # 235298 traces 35 | start_date = datetime.date(2020, 7, 27) 36 | end_date = datetime.date(2021, 6, 1) 37 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 38 | 39 | for today in tqdm(all_days): 40 | all_traces = [] 41 | 42 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 43 | trajs = np.load(f"{config.dir}/cooked/{date_string}_trajs.npy", allow_pickle=True) 44 | lat_buf = np.load(f"{config.buf_latent_dir}/{date_string}_features.npy", allow_pickle=True) 45 | lat_dt = np.load(f"{config.dt_latent_dir}/{date_string}_features.npy", allow_pickle=True) 46 | 47 | assert len(trajs) == len(lat_buf) 48 | assert len(trajs) == len(lat_dt) 49 | 50 | for traj, lat_buf_traj, lat_dt_traj in zip(trajs, lat_buf, lat_dt): 51 | assert len(lat_buf_traj) == len(lat_dt_traj) 52 | assert len(traj) > 1 53 | buf_lat_traj = np.array(lat_buf_traj).squeeze() 54 | dt_lat_traj = np.array(lat_dt_traj).squeeze() 55 | assert buf_lat_traj.shape == dt_lat_traj.shape 56 | fact_thr_traj = traj[:-1, 7] / traj[:-1, 6] 57 | act_inter_times_traj = traj[1:, 9] 58 | min_rtt_traj = traj[:-1, 14] / 1000 59 | size_s = traj[:-1, 16:28] 60 | ssim_s = ssim_db(traj[:-1, 28:40]) 61 | all_traces.append(np.c_[buf_lat_traj, dt_lat_traj, fact_thr_traj, act_inter_times_traj, min_rtt_traj, 62 | size_s, ssim_s]) 63 | all_ts += 1 64 | 65 | watch_time_no_stall = 2.002 * len(traj) - traj[-1, 0] 66 | assert watch_time_no_stall > 0 67 | all_wts.append(watch_time_no_stall) 68 | 69 | np.save(f"{config.dir}/gp_cooked/{date_string}_trc.npy", np.array(all_traces, dtype=object)) 70 | 71 | np.save(f"{config.dir}/gp_cooked/wts.npy", np.array(all_wts)) 72 | print(f"There were {all_ts} traces!!!") 73 | 74 | src_stats = f"{config.dir}/2020-07-27to2021-06-01_no_filter_data/" 75 | stats = ['mean', 'std'] 76 | tags = [ 77 | ('buffs', 'buffer'), 78 | ('chats', 'c_hat'), 79 | ('actions', 'chosen_chunk_size'), 80 | ('dts', 'download_time'), 81 | ('next_buffs', 'next_buffer'), 82 | ] 83 | 84 | for stat, (tag_src, tag_dst) in itertools.product(stats, tags): 85 | shutil.copyfile(f"{src_stats}/{tag_src}_{stat}.npy", f"{config.dir}/gp_cooked/{tag_dst}_{stat}.npy") 86 | 87 | print(f"Copied normalization statistics!!!") 88 | 89 | 90 | if __name__ == '__main__': 91 | main() 92 | -------------------------------------------------------------------------------- /abr-puffer/analysis/tune_buffer_hyperparams.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import pickle 4 | import os 5 | import itertools 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--dir", help="source directory") 9 | args = parser.parse_args() 10 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 11 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 12 | buffer_hyperparams = {policy: [] for policy in buffer_based_names} 13 | 14 | C_list = ['0.05', '0.1', '0.5', '1.0', '5.0', '10.0', '15.0', '20.0', '25.0', '30.0', '40.0'] 15 | 16 | keys = ['buff'] 17 | for left_out_policy in buffer_based_names: 18 | sim_EMDs = {key: {'val': []} for key in keys} 19 | expert_EMDs = {key: {'val': []} for key in keys} 20 | sl_EMDs = {key: {'val': []} for key in keys} 21 | for idx, key in enumerate(keys): 22 | for C in C_list: 23 | with open(f'{args.dir}subset_EMDs/{left_out_policy}/sim_{key}_{C}.pkl', 'rb') as f: 24 | sim_dict = pickle.load(f) 25 | with open(f'{args.dir}subset_EMDs/{left_out_policy}/expert_{key}_{C}.pkl', 'rb') as f: 26 | expert_dict = pickle.load(f) 27 | with open(f'{args.dir}subset_EMDs/{left_out_policy}/sl_{key}_{C}.pkl', 'rb') as f: 28 | sl_dict = pickle.load(f) 29 | sim_EMDs[key]['val'].append(np.mean( 30 | [sim_dict[source][target] for (source, target) in itertools.product(policy_names, buffer_based_names) if 31 | target != left_out_policy and source != left_out_policy])) 32 | expert_EMDs[key]['val'].append(np.mean( 33 | [expert_dict[source][target] for (source, target) in itertools.product(policy_names, buffer_based_names) 34 | if target != left_out_policy and source != left_out_policy])) 35 | sl_EMDs[key]['val'].append(np.mean( 36 | [sl_dict[source][target] for (source, target) in itertools.product(policy_names, buffer_based_names) if 37 | target != left_out_policy and source != left_out_policy])) 38 | 39 | expert_data = {policy: [] for policy in buffer_based_names} 40 | sl_data = {policy: [] for policy in buffer_based_names} 41 | sim_data = {C: {policy: [] for policy in buffer_based_names} for C in C_list} 42 | with open(f'{args.dir}subset_expert_ssim_dicts/{left_out_policy}/expert_ssims.pkl', 'rb') as f: 43 | expert_ssims = pickle.load(f) 44 | with open(f'{args.dir}subset_sl_ssim_dicts/{left_out_policy}/ssims.pkl', 'rb') as f: 45 | sl_ssims = pickle.load(f) 46 | with open(f'{args.dir}subset_orig_ssim_dicts/{left_out_policy}/orig_ssims.pkl', 'rb') as f: 47 | orig_ssims = pickle.load(f) 48 | for policy in buffer_based_names: 49 | expert_data[policy].append(abs(expert_ssims[policy] - orig_ssims[policy])) 50 | for policy in buffer_based_names: 51 | sl_data[policy].append(abs(sl_ssims[policy] - orig_ssims[policy])) 52 | for C in C_list: 53 | with open(f'{args.dir}subset_ssim_dicts/{left_out_policy}/ssims_{C}.pkl', 'rb') as f: 54 | sim_ssims = pickle.load(f) 55 | for policy in buffer_based_names: 56 | sim_data[C][policy].append(abs(sim_ssims[policy] - orig_ssims[policy])) 57 | buffer_hyperparams[left_out_policy].append(C_list[np.argmin( 58 | [np.mean([sim_data[C][policy] for policy in buffer_based_names if policy != left_out_policy]) for C in C_list])]) 59 | buffer_hyperparams[left_out_policy].append(C_list[np.argmin(sim_EMDs['buff']['val'])]) 60 | 61 | hyperparam_path = f'{args.dir}tuned_hyperparams' 62 | os.makedirs(hyperparam_path, exist_ok=True) 63 | with open(f'{hyperparam_path}/buffer.pkl', 'wb') as f: 64 | pickle.dump(buffer_hyperparams, f) 65 | -------------------------------------------------------------------------------- /abr-puffer/bayes_opt/plot_paretos.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import argparse 4 | from env.abr import ssim_db 5 | 6 | 7 | # Adapted from https://oco-carbon.com/metrics/find-pareto-frontiers-in-python/ 8 | def pareto_frontier(x_s, y_s, map_x=False, map_y=True): 9 | my_list = sorted([[x_s[i], y_s[i]] for i in range(len(x_s))], reverse=map_x) 10 | p_front = [my_list[0]] 11 | for pair in my_list[1:]: 12 | if map_y: 13 | if pair[1] >= p_front[-1][1]: 14 | p_front.append(pair) 15 | else: 16 | if pair[1] <= p_front[-1][1]: 17 | p_front.append(pair) 18 | rem = [] 19 | for start in p_front: 20 | for end in p_front: 21 | for mid in p_front: 22 | if start != end and start != mid and end != mid: 23 | if start[0] < mid[0] < end[0] and \ 24 | (mid[0]-start[0])/(end[0]-start[0])*(end[1]-start[1])+start[1] > mid[1]: 25 | if mid not in rem: 26 | rem.append(mid) 27 | for pair in rem: 28 | p_front.remove(pair) 29 | p_front_x = [pair[0] for pair in p_front] 30 | p_front_y = [pair[1] for pair in p_front] 31 | return np.array(p_front_x), np.array(p_front_y) 32 | 33 | 34 | def main(): 35 | parser = argparse.ArgumentParser(description='parameters') 36 | parser.add_argument('--dir', type=str, required=True, help='Output folder') 37 | parser.add_argument('--sim_mode', type=str, required=True, choices=['causalsim', 'expertsim'], help='Simulator') 38 | parser.add_argument('--policies', type=str, nargs='+', required=True, choices=['bba', 'bola1', 'bola2'], 39 | help='Policies to plot') 40 | parser.add_argument('--annotate_frontier', action='store_true', 41 | help='Annotate the parameters for the frontier as (cushion, reservoir)') 42 | config = parser.parse_args() 43 | 44 | plt.figure(figsize=(14, 10)) 45 | 46 | dict_col = { 47 | 'bba': 'C2', 48 | 'bola1': 'C1', 49 | 'bola2': 'C3', 50 | } 51 | 52 | for pol in config.policies: 53 | run_stats = np.load(f'{config.dir}/tests/gp_{pol}_{config.sim_mode}/run_stats.npy') 54 | print(f'There are {len(run_stats)} points') 55 | s_s = ssim_db(run_stats[:, [3]]) 56 | r_s = run_stats[:, [4]] 57 | accept = np.logical_and(r_s < 0.07, s_s > 14) 58 | 59 | x, y = pareto_frontier(r_s[accept], s_s[accept]) 60 | plt.scatter(r_s[accept] * 100, s_s[accept], marker='o', color=dict_col[pol], label=pol) 61 | plt.fill_between(x*100, y, s_s[accept].min(), color=dict_col[pol], alpha=0.1) 62 | plt.plot(x*100, y, color=dict_col[pol]) 63 | 64 | if config.annotate_frontier: 65 | b, t = plt.gca().get_ylim() 66 | r, l = plt.gca().get_xlim() 67 | hd = (t-b)/100 68 | wd = (r-l)/100 69 | for i in range(len(x)): 70 | i_x = np.where((r_s == x[i]) & (s_s == y[i]))[0] 71 | assert len(i_x) == 1 72 | i_x = i_x[0] 73 | plt.annotate(f'({run_stats[i_x, 0]:.1f}, {run_stats[i_x, 1]:.1f})', xy=(x[i]*100+wd, y[i]+hd)) 74 | 75 | plt.legend() 76 | plt.grid() 77 | ax = plt.gca() 78 | ax.spines['right'].set_visible(False) 79 | ax.spines['top'].set_visible(False) 80 | plt.xlabel(r'Time Spent Stalled (%)') 81 | plt.ylabel(r'Average SSIM (dB)') 82 | ax.plot(1, 0, ">k", transform=ax.transAxes, clip_on=False) 83 | ax.plot(0, 1, "^k", transform=ax.transAxes, clip_on=False) 84 | plt.gca().invert_xaxis() 85 | plt.tight_layout() 86 | plt.savefig(f'{config.dir}/pareto_{config.sim_mode}_{"_".join(config.policies)}.pdf', format='pdf') 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /abr-synthetic/slsim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.utils.tensorboard import SummaryWriter 5 | import os 6 | from tqdm import tqdm 7 | from sklearn.model_selection import train_test_split 8 | from nn_util import MLP 9 | 10 | loss_dics = { 11 | "mse_loss": nn.MSELoss(), 12 | "l1_loss": nn.L1Loss(), 13 | "huber_loss": nn.HuberLoss(), 14 | } 15 | 16 | 17 | def train_slsim( 18 | datapath, models_path="models", BATCH_SIZE=2**13, N=int(5 * 1e6), loss="mse_loss" 19 | ): 20 | path_models = f"{models_path}/slsim/" 21 | 22 | try: 23 | os.makedirs(path_models) 24 | except: 25 | pass 26 | log_path = f"{path_models}/logs" 27 | try: 28 | os.makedirs(log_path) 29 | except: 30 | pass 31 | 32 | if torch.cuda.is_available(): 33 | device = torch.device(f"cuda:3") 34 | else: 35 | device = torch.device(f"cpu") 36 | 37 | inputs_train = np.load(f"{datapath}/white_train_inputs_synthetic.npy") # 38 | # [:, (buffer, chosen_chunk_size, min_rtt, c_hat)] 39 | 40 | outputs_train = np.load(f"{datapath}/white_train_outputs_synthetic.npy")[:, :] 41 | # [:, (next_buffer, download_time, policy_label)] 42 | 43 | val_size = int(inputs_train.shape[0] * 0.15) 44 | train_idx, val_idx = train_test_split( 45 | np.arange(len(inputs_train)), test_size=val_size, train_size=None 46 | ) 47 | 48 | train_input_tensors = torch.as_tensor( 49 | inputs_train[train_idx], dtype=torch.float32, device=device 50 | ) 51 | train_output_tensors = torch.as_tensor( 52 | outputs_train[train_idx], dtype=torch.float32, device=device 53 | ) 54 | 55 | val_input_tensors = torch.as_tensor( 56 | inputs_train[val_idx], dtype=torch.float32, device=device 57 | ) 58 | val_output_tensors = torch.as_tensor( 59 | outputs_train[val_idx], dtype=torch.float32, device=device 60 | ) 61 | 62 | buffer_predictor = MLP( 63 | input_dim=4, output_dim=2, hidden_sizes=[128, 128], activation=nn.ReLU 64 | ).to(device) 65 | 66 | loss = loss_dics[loss] 67 | buffer_predictor_optimizer = torch.optim.Adam( 68 | buffer_predictor.parameters(), lr=1e-4 69 | ) 70 | writer_train = SummaryWriter(log_dir=f"{log_path}") 71 | best_loss = np.inf 72 | for epoch in tqdm(range(20000)): 73 | # Predictor training: 74 | idx = np.random.choice(np.arange(len(train_input_tensors)), size=BATCH_SIZE) 75 | batch_input_tensors = train_input_tensors[idx] 76 | batch_output_tensors = train_output_tensors[idx] 77 | buffer_predictor_optimizer.zero_grad() 78 | pred_tensors = buffer_predictor(batch_input_tensors[:, :]) 79 | pred_loss = loss(pred_tensors, batch_output_tensors[:, :2]) 80 | writer_train.add_scalar( 81 | "predictor_loss/prediction", pred_loss.cpu().detach().numpy(), epoch 82 | ) 83 | pred_loss.backward() 84 | buffer_predictor_optimizer.step() 85 | 86 | if epoch % 100 == 99: 87 | print( 88 | f"Train loss: epoch {epoch}, prediction loss {pred_loss.cpu().detach().numpy()}" 89 | ) 90 | ## val loss 91 | pred_tensors = buffer_predictor(val_input_tensors[:, :]) 92 | pred_loss = loss(pred_tensors, val_output_tensors[:, :2]) 93 | total_loss = pred_loss.cpu().detach().numpy() 94 | print( 95 | f"Val loss: epoch {epoch}, prediction loss {pred_loss.cpu().detach().numpy()}" 96 | ) 97 | if best_loss > total_loss: 98 | best_loss = total_loss 99 | print(f"saving ... best losses: {best_loss},") 100 | torch.save( 101 | buffer_predictor, f"{path_models}/best_buffer_" "predictor.pth" 102 | ) 103 | -------------------------------------------------------------------------------- /load-balancing/slsim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.utils.tensorboard import SummaryWriter 5 | import os 6 | from tqdm import tqdm 7 | from sklearn.model_selection import train_test_split 8 | from nn_util import MLP 9 | 10 | loss_dics = { 11 | "mse_loss": nn.MSELoss(), 12 | "l1_loss": nn.L1Loss(), 13 | "huber_loss": nn.HuberLoss(), 14 | } 15 | 16 | 17 | def train_slsim( 18 | datapath, models_path="models", BATCH_SIZE=2**13, N=int(5 * 1e6), loss="mse_loss" 19 | ): 20 | path_models = f"{models_path}/slsim/" 21 | 22 | try: 23 | os.makedirs(path_models) 24 | except: 25 | pass 26 | log_path = f"{path_models}/logs" 27 | try: 28 | os.makedirs(log_path) 29 | except: 30 | pass 31 | 32 | if torch.cuda.is_available(): 33 | device = torch.device(f"cuda:2") 34 | else: 35 | device = torch.device(f"cpu") 36 | 37 | # only 5 Millions points are used for training 38 | inputs_train = np.load(f"{datapath}/white_train_inputs.npy")[:N, :] # 39 | # [:, (processing_time, one hot encoder for action (16))] 40 | 41 | outputs_train = np.load(f"{datapath}/white_train_outputs.npy")[:N, :] 42 | # [:, (processing_time, policy_label)] 43 | 44 | val_size = int(inputs_train.shape[0] * 0.05) 45 | train_idx, val_idx = train_test_split( 46 | np.arange(len(inputs_train)), test_size=val_size, train_size=None 47 | ) 48 | 49 | train_input_tensors = torch.as_tensor( 50 | inputs_train[train_idx], dtype=torch.float32, device=device 51 | ) 52 | train_output_tensors = torch.as_tensor( 53 | outputs_train[train_idx], dtype=torch.float32, device=device 54 | ) 55 | 56 | val_input_tensors = torch.as_tensor( 57 | inputs_train[val_idx], dtype=torch.float32, device=device 58 | ) 59 | val_output_tensors = torch.as_tensor( 60 | outputs_train[val_idx], dtype=torch.float32, device=device 61 | ) 62 | 63 | buffer_predictor = MLP( 64 | input_dim=9, output_dim=1, hidden_sizes=[128, 128], activation=nn.ReLU 65 | ).to(device) 66 | 67 | loss = loss_dics[loss] 68 | buffer_predictor_optimizer = torch.optim.Adam( 69 | buffer_predictor.parameters(), lr=5e-5 70 | ) 71 | 72 | writer_train = SummaryWriter(log_dir=f"{log_path}") 73 | 74 | best_loss = np.inf 75 | for epoch in tqdm(range(10000)): 76 | # Predictor training: 77 | idx = np.random.choice(np.arange(len(train_input_tensors)), size=BATCH_SIZE) 78 | batch_input_tensors = train_input_tensors[idx] 79 | batch_output_tensors = train_output_tensors[idx] 80 | buffer_predictor_optimizer.zero_grad() 81 | 82 | predictor_input = batch_input_tensors[:, :] 83 | pred_tensors = buffer_predictor(predictor_input) 84 | pred_loss = loss(pred_tensors, batch_output_tensors[:, :1]) 85 | total_loss = pred_loss 86 | writer_train.add_scalar( 87 | "predictor_loss/prediction", pred_loss.cpu().detach().numpy(), epoch 88 | ) 89 | total_loss.backward() 90 | buffer_predictor_optimizer.step() 91 | 92 | if epoch % 100 == 99: 93 | print( 94 | f"Train loss: epoch {epoch}, prediction loss {pred_loss.cpu().detach().numpy()}" 95 | ) 96 | pred_tensors = buffer_predictor(val_input_tensors[:, :]) 97 | total_loss = loss(pred_tensors, val_output_tensors[:, :1]) 98 | print( 99 | f"Val loss: epoch {epoch}, prediction loss {pred_loss.cpu().detach().numpy()}" 100 | ) 101 | if best_loss > total_loss: 102 | best_loss = total_loss 103 | print(f"saving ... best losses: {best_loss}") 104 | torch.save(buffer_predictor, f"{path_models}/best_buffer_predictor.pth") 105 | -------------------------------------------------------------------------------- /abr-puffer/analysis/sl_subset_stall.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import datetime 3 | import argparse 4 | import pickle 5 | from tqdm import tqdm 6 | import os 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("--dir", help="source directory") 10 | parser.add_argument("--left_out_policy", type=str, help="left out policy") 11 | parser.add_argument("--model_number", type=int, help="saved model epoch number", default=10000) 12 | args = parser.parse_args() 13 | left_out_text = f'_{args.left_out_policy}' 14 | PERIOD_TEXT = f'2020-07-27to2021-06-01{left_out_text}' 15 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 16 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 17 | 18 | sl_data = {target_policy: {'rebuffs': [], 'lens': []} for target_policy in buffer_based_names} 19 | start_date = datetime.date(2020, 7, 27) 20 | end_date = datetime.date(2021, 6, 1) 21 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 22 | all_days = [day for day in all_days if day not in [datetime.date(2019, 5, 12), datetime.date(2019, 5, 13), 23 | datetime.date(2019, 5, 15), datetime.date(2019, 5, 17), 24 | datetime.date(2019, 5, 18), datetime.date(2019, 5, 19), 25 | datetime.date(2019, 5, 25), datetime.date(2019, 5, 27), 26 | datetime.date(2019, 5, 30), datetime.date(2019, 6, 1), 27 | datetime.date(2019, 6, 2), datetime.date(2019, 6, 3), 28 | datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), 29 | datetime.date(2019, 7, 4), datetime.date(2020, 7, 7), 30 | datetime.date(2020, 7, 8), datetime.date(2021, 6, 2), 31 | datetime.date(2021, 6, 2), datetime.date(2021, 6, 3), 32 | datetime.date(2022, 1, 31), datetime.date(2022, 2, 1), 33 | datetime.date(2022, 2, 2), datetime.date(2022, 2, 3), 34 | datetime.date(2022, 2, 4), datetime.date(2022, 2, 5), 35 | datetime.date(2022, 2, 6), datetime.date(2022, 2, 7)]] 36 | cooked_path = f'{args.dir}cooked' 37 | for today in tqdm(all_days): 38 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 39 | ids = np.load(f'{cooked_path}/{date_string}_ids_translated.npy', allow_pickle=True) 40 | cf_path = f'{args.dir}{PERIOD_TEXT}_sl_cfs/cfs/model_{args.model_number}' 41 | bba_rebuffs = np.load(f'{cf_path}/{date_string}_linear_bba_rebuffs.npy', allow_pickle=True) 42 | bola1_rebuffs = np.load(f'{cf_path}/{date_string}_bola1_rebuffs.npy', allow_pickle=True) 43 | bola2_rebuffs = np.load(f'{cf_path}/{date_string}_bola2_rebuffs.npy', allow_pickle=True) 44 | for idx, policy_name in enumerate(ids): 45 | if policy_name != args.left_out_policy: 46 | sl_data['bola_basic_v1']['rebuffs'].append(np.sum(bola1_rebuffs[idx][1:])) 47 | sl_data['bola_basic_v2']['rebuffs'].append(np.sum(bola2_rebuffs[idx][1:])) 48 | sl_data['linear_bba']['rebuffs'].append(np.sum(bba_rebuffs[idx][1:])) 49 | sl_data['bola_basic_v1']['lens'].append(len(bola1_rebuffs[idx]) * 2.002 + sl_data['bola_basic_v1']['rebuffs'][-1]) 50 | sl_data['bola_basic_v2']['lens'].append(len(bola2_rebuffs[idx]) * 2.002 + sl_data['bola_basic_v2']['rebuffs'][-1]) 51 | sl_data['linear_bba']['lens'].append(len(bba_rebuffs[idx]) * 2.002 + sl_data['linear_bba']['rebuffs'][-1]) 52 | stall_path = f'{args.dir}subset_sl_stall_dicts/{args.left_out_policy}' 53 | os.makedirs(stall_path, exist_ok=True) 54 | 55 | with open(f'{stall_path}/stalls.pkl', 'wb') as f: 56 | pickle.dump(sl_data, f) 57 | -------------------------------------------------------------------------------- /abr-puffer/analysis/sl_subset_ssim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import datetime 4 | import argparse 5 | import pickle 6 | from tqdm import tqdm 7 | import os 8 | 9 | MIN_SSIM = 0 10 | MAX_SSIM = 60 11 | 12 | 13 | def ssim_db(ssim: np.ndarray) -> np.ndarray: 14 | return np.where(ssim == 1, MAX_SSIM, np.clip(-10 * np.log10(1 - ssim), a_min=MIN_SSIM, a_max=MAX_SSIM)) 15 | 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("--dir", help="source directory") 19 | parser.add_argument("--left_out_policy", type=str, help="left out policy") 20 | parser.add_argument("--model_number", type=int, help="saved model epoch number", default=10000) 21 | args = parser.parse_args() 22 | NUMBER_OF_BINS = 10000 23 | left_out_text = f'_{args.left_out_policy}' 24 | PERIOD_TEXT = f'2020-07-27to2021-06-01{left_out_text}' 25 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 26 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 27 | 28 | start_date = datetime.date(2020, 7, 27) 29 | end_date = datetime.date(2021, 6, 1) 30 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 31 | all_days = [day for day in all_days if day not in [datetime.date(2019, 5, 12), datetime.date(2019, 5, 13), 32 | datetime.date(2019, 5, 15), datetime.date(2019, 5, 17), 33 | datetime.date(2019, 5, 18), datetime.date(2019, 5, 19), 34 | datetime.date(2019, 5, 25), datetime.date(2019, 5, 27), 35 | datetime.date(2019, 5, 30), datetime.date(2019, 6, 1), 36 | datetime.date(2019, 6, 2), datetime.date(2019, 6, 3), 37 | datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), 38 | datetime.date(2019, 7, 4), datetime.date(2020, 7, 7), 39 | datetime.date(2020, 7, 8), datetime.date(2021, 6, 2), 40 | datetime.date(2021, 6, 2), datetime.date(2021, 6, 3), 41 | datetime.date(2022, 1, 31), datetime.date(2022, 2, 1), 42 | datetime.date(2022, 2, 2), datetime.date(2022, 2, 3), 43 | datetime.date(2022, 2, 4), datetime.date(2022, 2, 5), 44 | datetime.date(2022, 2, 6), datetime.date(2022, 2, 7)]] 45 | cooked_path = f'{args.dir}cooked' 46 | plt.figure() 47 | sl_ssims = {target_policy: [] for target_policy in buffer_based_names} 48 | for today in tqdm(all_days): 49 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 50 | ids = np.load(f'{cooked_path}/{date_string}_ids_translated.npy', allow_pickle=True) 51 | sl_path = f'{args.dir}{PERIOD_TEXT}_sl_cfs/cfs/model_{args.model_number}' 52 | bba_ssims = np.load(f'{sl_path}/{date_string}_linear_bba_ssims.npy', allow_pickle=True) 53 | bola1_ssims = np.load(f'{sl_path}/{date_string}_bola1_ssims.npy', allow_pickle=True) 54 | bola2_ssims = np.load(f'{sl_path}/{date_string}_bola2_ssims.npy', allow_pickle=True) 55 | for idx, policy_name in enumerate(ids): 56 | if policy_name != args.left_out_policy: 57 | sl_ssims['bola_basic_v1'].append(bola1_ssims[idx]) 58 | sl_ssims['bola_basic_v2'].append(bola2_ssims[idx]) 59 | sl_ssims['linear_bba'].append(bba_ssims[idx]) 60 | for target in buffer_based_names: 61 | sl_ssims[target] = np.concatenate(sl_ssims[target]) 62 | sl_ssims[target] = np.mean(sl_ssims[target]) 63 | sl_ssims[target] = ssim_db(sl_ssims[target]) 64 | 65 | ssim_path = f'{args.dir}subset_sl_ssim_dicts/{args.left_out_policy}' 66 | os.makedirs(ssim_path, exist_ok=True) 67 | with open(f'{ssim_path}/ssims.pkl', 'wb') as f: 68 | pickle.dump(sl_ssims, f) 69 | -------------------------------------------------------------------------------- /abr-puffer/analysis/subset_ssim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import datetime 4 | import argparse 5 | import pickle 6 | from tqdm import tqdm 7 | import os 8 | 9 | MIN_SSIM = 0 10 | MAX_SSIM = 60 11 | 12 | 13 | def ssim_db(ssim: np.ndarray) -> np.ndarray: 14 | return np.where(ssim == 1, MAX_SSIM, np.clip(-10 * np.log10(1 - ssim), a_min=MIN_SSIM, a_max=MAX_SSIM)) 15 | 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("--dir", help="source directory") 19 | parser.add_argument("--C", type=float, help="discriminator loss coefficient") 20 | parser.add_argument("--left_out_policy", type=str, help="left out policy") 21 | parser.add_argument("--model_number", type=int, help="saved model epoch number", default=5000) 22 | args = parser.parse_args() 23 | NUMBER_OF_BINS = 10000 24 | left_out_text = f'_{args.left_out_policy}' 25 | PERIOD_TEXT = f'2020-07-27to2021-06-01{left_out_text}' 26 | DISCRIMINATOR_EPOCH = 10 27 | C = args.C 28 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 29 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 30 | 31 | start_date = datetime.date(2020, 7, 27) 32 | end_date = datetime.date(2021, 6, 1) 33 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 34 | all_days = [day for day in all_days if day not in [datetime.date(2019, 5, 12), datetime.date(2019, 5, 13), 35 | datetime.date(2019, 5, 15), datetime.date(2019, 5, 17), 36 | datetime.date(2019, 5, 18), datetime.date(2019, 5, 19), 37 | datetime.date(2019, 5, 25), datetime.date(2019, 5, 27), 38 | datetime.date(2019, 5, 30), datetime.date(2019, 6, 1), 39 | datetime.date(2019, 6, 2), datetime.date(2019, 6, 3), 40 | datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), 41 | datetime.date(2019, 7, 4), datetime.date(2020, 7, 7), 42 | datetime.date(2020, 7, 8), datetime.date(2021, 6, 2), 43 | datetime.date(2021, 6, 2), datetime.date(2021, 6, 3), 44 | datetime.date(2022, 1, 31), datetime.date(2022, 2, 1), 45 | datetime.date(2022, 2, 2), datetime.date(2022, 2, 3), 46 | datetime.date(2022, 2, 4), datetime.date(2022, 2, 5), 47 | datetime.date(2022, 2, 6), datetime.date(2022, 2, 7)]] 48 | cooked_path = f'{args.dir}cooked' 49 | plt.figure() 50 | sim_ssims = {target_policy: [] for target_policy in buffer_based_names} 51 | for today in tqdm(all_days): 52 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 53 | ids = np.load(f'{cooked_path}/{date_string}_ids_translated.npy', allow_pickle=True) 54 | sim_path = f'{args.dir}{PERIOD_TEXT}_buff_cfs/inner_loop_{DISCRIMINATOR_EPOCH}/C_{C}/cfs/' \ 55 | f'model_{args.model_number}' 56 | bba_ssims = np.load(f'{sim_path}/{date_string}_linear_bba_ssims.npy', allow_pickle=True) 57 | bola1_ssims = np.load(f'{sim_path}/{date_string}_bola1_ssims.npy', allow_pickle=True) 58 | bola2_ssims = np.load(f'{sim_path}/{date_string}_bola2_ssims.npy', allow_pickle=True) 59 | for idx, policy_name in enumerate(ids): 60 | if policy_name != args.left_out_policy: 61 | sim_ssims['bola_basic_v1'].append(bola1_ssims[idx]) 62 | sim_ssims['bola_basic_v2'].append(bola2_ssims[idx]) 63 | sim_ssims['linear_bba'].append(bba_ssims[idx]) 64 | for target in buffer_based_names: 65 | sim_ssims[target] = np.concatenate(sim_ssims[target]) 66 | sim_ssims[target] = np.mean(sim_ssims[target]) 67 | sim_ssims[target] = ssim_db(sim_ssims[target]) 68 | 69 | ssim_path = f'{args.dir}subset_ssim_dicts/{args.left_out_policy}' 70 | os.makedirs(ssim_path, exist_ok=True) 71 | with open(f'{ssim_path}/ssims_{C}.pkl', 'wb') as f: 72 | pickle.dump(sim_ssims, f) 73 | -------------------------------------------------------------------------------- /abr-puffer/training/sl_subset_train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.utils.tensorboard import SummaryWriter 5 | import os 6 | import argparse 7 | from tqdm import tqdm 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--dir", help="root directory") 11 | parser.add_argument("--left_out_policy", type=str, help="left out policy") 12 | parser.add_argument("--device", type=str, help="Compute device", default='cuda:0') 13 | parser.add_argument("--batch_size", type=int, default=17) 14 | args = parser.parse_args() 15 | BATCH_SIZE = 2 ** args.batch_size 16 | device = torch.device(args.device) 17 | left_out_text = f'_{args.left_out_policy}' 18 | PERIOD_TEXT = f'2020-07-27to2021-06-01{left_out_text}' 19 | 20 | torch.manual_seed(10) 21 | np.random.seed(10) 22 | 23 | 24 | def mlp(sizes, activation, output_activation=nn.Identity): 25 | layers = [] 26 | for j in range(len(sizes) - 1): 27 | act = activation if j < len(sizes) - 2 else output_activation 28 | layers += [nn.Linear(sizes[j], sizes[j + 1]), act()] 29 | return nn.Sequential(*layers) 30 | 31 | 32 | class MLP(nn.Module): 33 | 34 | def __init__(self, input_dim, output_dim, hidden_sizes, activation): 35 | super().__init__() 36 | self.predict = mlp(sizes=[input_dim] + list(hidden_sizes) + [output_dim], activation=activation, 37 | output_activation=nn.Identity) 38 | 39 | def forward(self, raw_input): 40 | prediction = self.predict(raw_input) 41 | return prediction 42 | 43 | 44 | new_path = f'{args.dir}{PERIOD_TEXT}_SL_trained_models' 45 | os.makedirs(new_path, exist_ok=True) 46 | data_dir = f'{args.dir}subset_data/{args.left_out_policy}' 47 | dts = np.load(f'{data_dir}/white_dts.npy') 48 | buffs = np.load(f'{data_dir}/white_buffs.npy') 49 | next_buffs = np.load(f'{data_dir}/white_next_buffs.npy') 50 | policy_numbers = np.load(f'{data_dir}/policy_numbers.npy') 51 | chats = np.load(f'{data_dir}/white_chats.npy') 52 | actions = np.load(f'{data_dir}/white_actions.npy') 53 | data_size = len(chats) 54 | dt_tensors = torch.as_tensor(dts, dtype=torch.float32, device=device) 55 | del dts 56 | buff_tensors = torch.as_tensor(buffs, dtype=torch.float32, device=device) 57 | next_buff_tensors = torch.as_tensor(next_buffs, dtype=torch.float32, device=device) 58 | del buffs, next_buffs 59 | chat_tensors = torch.as_tensor(chats, dtype=torch.float32, device=device) 60 | action_tensors = torch.as_tensor(actions, dtype=torch.float32, device=device) 61 | policy_number_tensors = torch.as_tensor(policy_numbers, dtype=torch.long, device=device) 62 | del chats, actions, policy_numbers 63 | 64 | predictor = MLP(input_dim=3, output_dim=2, hidden_sizes=[128, 128], activation=nn.ReLU).to(device=device) 65 | huber_loss = nn.HuberLoss(delta=0.2) 66 | predictor_optimizer = torch.optim.Adam(predictor.parameters()) 67 | writer_train = SummaryWriter(log_dir=f"{args.dir}logs/subset_{args.left_out_policy}_SL") 68 | for epoch in tqdm(range(10000)): 69 | # Predictor training: 70 | idx = np.random.choice(data_size, size=BATCH_SIZE) 71 | predictor_optimizer.zero_grad() 72 | pred_tensors = predictor(torch.cat((buff_tensors[idx].unsqueeze(1), 73 | action_tensors[idx].unsqueeze(1), chat_tensors[idx].unsqueeze(1)), dim=1)) 74 | dt_pred_tensors = pred_tensors[:, 1:2] 75 | dt_gt_tensors = dt_tensors[idx].unsqueeze(1) 76 | buff_pred_tensors = pred_tensors[:, 0:1] 77 | buff_gt_tensors = next_buff_tensors[idx].unsqueeze(1) 78 | assert buff_pred_tensors.shape == buff_gt_tensors.shape, f'{buff_pred_tensors.shape}, {buff_gt_tensors.shape}' 79 | assert dt_pred_tensors.shape == dt_gt_tensors.shape, f'{dt_pred_tensors.shape}, {dt_gt_tensors.shape}' 80 | buff_pred_loss = huber_loss(buff_pred_tensors, buff_gt_tensors) 81 | dt_pred_loss = huber_loss(dt_pred_tensors, dt_gt_tensors) 82 | pred_loss = (dt_pred_loss + buff_pred_loss) / 2 83 | writer_train.add_scalar("predictor_loss/dt_prediction", dt_pred_loss.cpu().detach().item(), epoch) 84 | writer_train.add_scalar("predictor_loss/buff_prediction", buff_pred_loss.cpu().detach().item(), epoch) 85 | pred_loss.backward() 86 | predictor_optimizer.step() 87 | if epoch % 100 == 99: 88 | torch.save(predictor, f'{new_path}/{epoch + 1}_predictor.pth') 89 | -------------------------------------------------------------------------------- /abr-puffer/analysis/subset_stall.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import datetime 3 | import argparse 4 | import pickle 5 | from tqdm import tqdm 6 | import os 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("--dir", help="source directory") 10 | parser.add_argument("--C", type=float, help="discriminator loss coefficient") 11 | parser.add_argument("--left_out_policy", type=str, help="left out policy") 12 | parser.add_argument("--model_number", type=int, help="saved model epoch number", default=5000) 13 | args = parser.parse_args() 14 | NUMBER_OF_BINS = 10000 15 | left_out_text = f'_{args.left_out_policy}' 16 | PERIOD_TEXT = f'2020-07-27to2021-06-01{left_out_text}' 17 | DISCRIMINATOR_EPOCH = 10 18 | C = args.C 19 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 20 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 21 | 22 | sim_data = {target_policy: {'rebuffs': [], 'lens': []} for target_policy in buffer_based_names} 23 | start_date = datetime.date(2020, 7, 27) 24 | end_date = datetime.date(2021, 6, 1) 25 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 26 | all_days = [day for day in all_days if day not in [datetime.date(2019, 5, 12), datetime.date(2019, 5, 13), 27 | datetime.date(2019, 5, 15), datetime.date(2019, 5, 17), 28 | datetime.date(2019, 5, 18), datetime.date(2019, 5, 19), 29 | datetime.date(2019, 5, 25), datetime.date(2019, 5, 27), 30 | datetime.date(2019, 5, 30), datetime.date(2019, 6, 1), 31 | datetime.date(2019, 6, 2), datetime.date(2019, 6, 3), 32 | datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), 33 | datetime.date(2019, 7, 4), datetime.date(2020, 7, 7), 34 | datetime.date(2020, 7, 8), datetime.date(2021, 6, 2), 35 | datetime.date(2021, 6, 2), datetime.date(2021, 6, 3), 36 | datetime.date(2022, 1, 31), datetime.date(2022, 2, 1), 37 | datetime.date(2022, 2, 2), datetime.date(2022, 2, 3), 38 | datetime.date(2022, 2, 4), datetime.date(2022, 2, 5), 39 | datetime.date(2022, 2, 6), datetime.date(2022, 2, 7)]] 40 | cooked_path = f'{args.dir}cooked' 41 | for today in tqdm(all_days): 42 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 43 | ids = np.load(f'{cooked_path}/{date_string}_ids_translated.npy', allow_pickle=True) 44 | cf_path = f'{args.dir}{PERIOD_TEXT}_dt_cfs/inner_loop_{DISCRIMINATOR_EPOCH}/C_{C}/cfs/' \ 45 | f'model_{args.model_number}' 46 | bba_rebuffs = np.load(f'{cf_path}/{date_string}_linear_bba_rebuffs.npy', allow_pickle=True) 47 | bola1_rebuffs = np.load(f'{cf_path}/{date_string}_bola1_rebuffs.npy', allow_pickle=True) 48 | bola2_rebuffs = np.load(f'{cf_path}/{date_string}_bola2_rebuffs.npy', allow_pickle=True) 49 | for idx, policy_name in enumerate(ids): 50 | if policy_name != args.left_out_policy: 51 | sim_data['bola_basic_v1']['rebuffs'].append(np.sum(bola1_rebuffs[idx][1:])) 52 | sim_data['bola_basic_v2']['rebuffs'].append(np.sum(bola2_rebuffs[idx][1:])) 53 | sim_data['linear_bba']['rebuffs'].append(np.sum(bba_rebuffs[idx][1:])) 54 | sim_data['bola_basic_v1']['lens'].append(len(bola1_rebuffs[idx]) * 2.002 + sim_data['bola_basic_v1']['rebuffs'][-1]) 55 | sim_data['bola_basic_v2']['lens'].append(len(bola2_rebuffs[idx]) * 2.002 + sim_data['bola_basic_v2']['rebuffs'][-1]) 56 | sim_data['linear_bba']['lens'].append(len(bba_rebuffs[idx]) * 2.002 + sim_data['linear_bba']['rebuffs'][-1]) 57 | stall_path = f'{args.dir}subset_stall_dicts/{args.left_out_policy}' 58 | os.makedirs(stall_path, exist_ok=True) 59 | 60 | with open(f'{stall_path}/stalls_{C}.pkl', 'wb') as f: 61 | pickle.dump(sim_data, f) 62 | -------------------------------------------------------------------------------- /abr-puffer/visualization/fig4.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pickle 3 | import numpy as np 4 | import argparse 5 | import os 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--dir", help="root directory") 9 | args = parser.parse_args() 10 | 11 | plt.rcParams.update({ 12 | "text.usetex": True, 13 | 'legend.fontsize': 6, 14 | 'font.family': 'serif', 15 | 'font.serif': ['Times'] 16 | }) 17 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 18 | color_dict = {'linear_bba': 'C2', 'bola_basic_v1': 'C1', 'bola_basic_v2': 'C3'} 19 | marker_dict = {'expert': 'v', 'orig': 'o', 'sl': 's', 'causal': '*'} 20 | with open(f'{args.dir}tuned_hyperparams/buffer.pkl', 'rb') as f: 21 | f = pickle.load(f) 22 | bf_C = {policy: f[policy][0] for policy in buffer_based_names} 23 | with open(f'{args.dir}tuned_hyperparams/downloadtime.pkl', 'rb') as f: 24 | f = pickle.load(f) 25 | dt_C = {policy: f[policy][0] for policy in buffer_based_names} 26 | 27 | plt.figure(figsize=(3.25, 2.25)) 28 | #Original 29 | for policy in buffer_based_names: 30 | with open(f'{args.dir}subset_orig_ssim_dicts/{policy}/orig_ssims.pkl', 'rb') as f: 31 | orig_ssim = pickle.load(f) 32 | with open(f'{args.dir}subset_orig_rebuff_dicts/{policy}/orig_rebuffs.pkl', 'rb') as f: 33 | orig_stall = pickle.load(f) 34 | plt.scatter(100 * np.sum(orig_stall[policy]['rebuffs']) / np.sum(orig_stall[policy]['lens']), orig_ssim[policy], color=color_dict[policy], marker=marker_dict['orig'], label=f'orig_{policy}', s=25, zorder=500) 35 | print(f'orig_{policy}', 100 * np.sum(orig_stall[policy]['rebuffs']) / np.sum(orig_stall[policy]['lens']), orig_ssim[policy]) 36 | #Expert 37 | for policy in buffer_based_names: 38 | with open(f'{args.dir}subset_expert_rebuff_dicts/{policy}/expert_rebuffs.pkl', 'rb') as f: 39 | expert_stall = pickle.load(f) 40 | with open(f'{args.dir}subset_expert_ssim_dicts/{policy}/expert_ssims.pkl', 'rb') as f: 41 | expert_ssim = pickle.load(f) 42 | plt.scatter(100 * np.sum(expert_stall[policy]['rebuffs']) / np.sum(expert_stall[policy]['lens']), expert_ssim[policy], color=color_dict[policy], marker=marker_dict['expert'], label=f'expert_{policy}', s=25, zorder=500) 43 | print(f'expert_{policy}', 100 * np.sum(expert_stall[policy]['rebuffs']) / np.sum(expert_stall[policy]['lens']), expert_ssim[policy]) 44 | #SL 45 | for policy in buffer_based_names: 46 | with open(f'{args.dir}subset_sl_stall_dicts/{policy}/stalls.pkl', 'rb') as f: 47 | sl_stall = pickle.load(f) 48 | with open(f'{args.dir}subset_sl_ssim_dicts/{policy}/ssims.pkl', 'rb') as f: 49 | sl_ssim = pickle.load(f) 50 | plt.scatter(100 * np.sum(sl_stall[policy]['rebuffs']) / np.sum(sl_stall[policy]['lens']), sl_ssim[policy], color=color_dict[policy], marker=marker_dict['sl'], label=f'sl_{policy}', s=25, zorder=500) 51 | print(f'sl_{policy}', 100 * np.sum(sl_stall[policy]['rebuffs']) / np.sum(sl_stall[policy]['lens']), sl_ssim[policy]) 52 | #Causal 53 | for policy in buffer_based_names: 54 | with open(f'{args.dir}subset_stall_dicts/{policy}/stalls_{dt_C[policy]}.pkl', 'rb') as f: 55 | causal_stall = pickle.load(f) 56 | with open(f'{args.dir}subset_ssim_dicts/{policy}/ssims_{bf_C[policy]}.pkl', 'rb') as f: 57 | causal_ssim = pickle.load(f) 58 | plt.scatter(100 * np.sum(causal_stall[policy]['rebuffs']) / np.sum(causal_stall[policy]['lens']), causal_ssim[policy], color=color_dict[policy], marker=marker_dict['causal'], label=f'causal_{policy}', s=35, zorder=500) 59 | print(f'causal_{policy}', 100 * np.sum(causal_stall[policy]['rebuffs']) / np.sum(causal_stall[policy]['lens']), causal_ssim[policy]) 60 | from matplotlib.lines import Line2D 61 | handles = [Line2D([0], [0], color='w', markerfacecolor='w', markeredgecolor='k', marker='o'), 62 | Line2D([0], [0], color='w', markerfacecolor='w', markeredgecolor='k', marker='*', ms=8), 63 | Line2D([0], [0], color='w', markerfacecolor='w', markeredgecolor='k', marker='v'), 64 | Line2D([0], [0], color='w', markerfacecolor='w', markeredgecolor='k', marker='s'),] 65 | plt.legend(handles, ['Ground Truth', 'CausalSim', 'ExpertSim', 'SLSim'], ncol=4, loc='lower left', bbox_to_anchor=(-0.066, 1.1, 1.1, 0.25), borderaxespad=0, borderpad=0.5, mode="expand", handletextpad=0.2) 66 | plt.xlabel(r'Time Spent Stalled (\%)', fontsize=8) 67 | plt.ylabel(r'Average SSIM (dB)', fontsize=8) 68 | plt.yticks(fontsize=8) 69 | plt.xticks(fontsize=8) 70 | plt.tight_layout() 71 | plt.xlim([0.5, 9.1]) 72 | plt.ylim([14.81, 15.69]) 73 | plt.gca().invert_xaxis() 74 | plt.grid(zorder=550) 75 | 76 | fig_path = f'{args.dir}plots' 77 | os.makedirs(fig_path, exist_ok=True) 78 | plt.savefig(f'{fig_path}/fig4.pdf', format='pdf') -------------------------------------------------------------------------------- /abr-puffer/bayes_opt/env/bola.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | from .abr import ABRSimEnv, MIN_SSIM, MAX_SSIM, db_ssim, ssim_db 5 | 6 | 7 | class BolaAgent(object): 8 | size_ladder_bytes = [44319, 93355, 115601, 142904, 196884, 263965, 353752, 494902, 632193, 889893] 9 | ssim_index_ladder = [0.91050748, 0.94062527, 0.94806355, 0.95498943, 0.96214503, 0.96717277, 0.97273958, 0.97689813, 10 | 0.98004106, 0.98332605] 11 | MIN_BUF_S = 3 12 | MAX_BUF_S = ABRSimEnv.MAX_BUFFER_S 13 | 14 | def __init__(self, version: int, act_len: int, reservoir: float = None, cushion: float = None): 15 | super(BolaAgent, self).__init__() 16 | assert self.size_ladder_bytes[0] < self.size_ladder_bytes[1] 17 | assert self.ssim_index_ladder[0] < self.ssim_index_ladder[1] 18 | 19 | self.ssim_index_ladder = (ssim_db(np.array(self.ssim_index_ladder)) - MIN_SSIM) / MAX_SSIM 20 | 21 | assert self.MIN_BUF_S < self.MAX_BUF_S 22 | assert version in [1, 2] 23 | self.version = version 24 | self.act_n = act_len 25 | 26 | smallest = {'size': self.size_ladder_bytes[0], 27 | 'utility': self.utility(self.ssim_index_ladder[0])} 28 | second_smallest = {'size': self.size_ladder_bytes[1], 29 | 'utility': self.utility(self.ssim_index_ladder[1])} 30 | second_largest = {'size': self.size_ladder_bytes[-2], 31 | 'utility': self.utility(self.ssim_index_ladder[-2])} 32 | largest = {'size': self.size_ladder_bytes[-1], 33 | 'utility': self.utility(self.ssim_index_ladder[-1])} 34 | 35 | size_delta = self.size_ladder_bytes[1] - self.size_ladder_bytes[0] 36 | if version == 1: 37 | utility_high = largest['utility'] 38 | else: 39 | utility_high = self.utility(1) 40 | 41 | size_utility_term = second_smallest['size'] * smallest['utility'] - \ 42 | smallest['size'] * second_smallest['utility'] 43 | 44 | gp_nominator = self.MAX_BUF_S * size_utility_term - utility_high * self.MIN_BUF_S * size_delta 45 | gp_denominator = ((self.MIN_BUF_S - self.MAX_BUF_S) * size_delta) 46 | if reservoir is not None or cushion is not None: 47 | assert reservoir is not None and cushion is not None 48 | int_first_pair = -size_utility_term/size_delta 49 | size_delta_last = self.size_ladder_bytes[-1] - self.size_ladder_bytes[-2] 50 | size_utility_term_last = second_largest['size'] * largest['utility'] - \ 51 | largest['size'] * second_largest['utility'] 52 | int_last_pair = size_utility_term_last / size_delta_last 53 | self.Vp = cushion / (int_first_pair - int_last_pair) 54 | self.gp = reservoir / self.Vp + int_first_pair 55 | else: 56 | self.gp = gp_nominator / gp_denominator 57 | self.Vp = self.MAX_BUF_S / (utility_high + self.gp) 58 | 59 | def utility(self, ssim_index: float or np.ndarray) -> float or np.ndarray: 60 | unnorm_db = ssim_index * MAX_SSIM + MIN_SSIM 61 | if self.version == 1: 62 | return unnorm_db 63 | else: 64 | return db_ssim(unnorm_db) 65 | 66 | def objective(self, utility: float or np.ndarray, size: float or np.ndarray, buffer: float) -> float or np.ndarray: 67 | return (self.Vp * (utility + self.gp) - buffer) / size 68 | 69 | def choose_max_objective(self, format_sizes: np.ndarray, format_ssims: np.ndarray, 70 | buffer: float) -> Tuple[int, float]: 71 | objs = self.objective(self.utility(format_ssims), format_sizes, buffer) 72 | chosen_index = np.argmax(objs).item() 73 | return chosen_index, objs[chosen_index] 74 | 75 | def choose_max_scaled_utility(self, format_ssims: np.ndarray) -> int: 76 | chosen_index = np.argmax(self.utility(format_ssims)).item() 77 | return chosen_index 78 | 79 | def sample_action(self, obs: np.ndarray) -> int: 80 | buffer = (obs[0] + 1) / 2 * self.MAX_BUF_S 81 | valid_mask = np.logical_not(np.logical_or(np.isnan(obs[-2 * self.act_n: -self.act_n]), 82 | np.isnan(obs[-self.act_n:]))) 83 | size_arr_valid = obs[-2 * self.act_n: -self.act_n][valid_mask] 84 | ssim_arr_valid = obs[-self.act_n:][valid_mask] 85 | index_arr_valid = np.arange(self.act_n)[valid_mask] 86 | 87 | max_obj_index, max_obj = self.choose_max_objective(size_arr_valid, ssim_arr_valid, buffer) 88 | 89 | if self.version == 1 or max_obj >= 0: 90 | return index_arr_valid[max_obj_index] 91 | else: 92 | max_util_index = self.choose_max_scaled_utility(ssim_arr_valid) 93 | return index_arr_valid[max_util_index] 94 | -------------------------------------------------------------------------------- /abr-puffer/analysis/original_subset_ssim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import datetime 4 | import argparse 5 | import pickle 6 | from tqdm import tqdm 7 | import os 8 | 9 | MIN_SSIM = 0 10 | MAX_SSIM = 60 11 | 12 | 13 | def ssim_db(ssim: np.ndarray) -> np.ndarray: 14 | return np.where(ssim == 1, MAX_SSIM, np.clip(-10 * np.log10(1 - ssim), a_min=MIN_SSIM, a_max=MAX_SSIM)) 15 | 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("--dir", help="source directory") 19 | parser.add_argument("--left_out_policy", type=str, help="left out policy") 20 | args = parser.parse_args() 21 | NUMBER_OF_BINS = 10000 22 | left_out_text = f'_{args.left_out_policy}' 23 | PERIOD_TEXT = f'2020-07-27to2021-06-01{left_out_text}' 24 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 25 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 26 | start_date = datetime.date(2020, 7, 27) 27 | end_date = datetime.date(2021, 6, 1) 28 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 29 | all_days = [day for day in all_days if day not in [datetime.date(2019, 5, 12), datetime.date(2019, 5, 13), 30 | datetime.date(2019, 5, 15), datetime.date(2019, 5, 17), 31 | datetime.date(2019, 5, 18), datetime.date(2019, 5, 19), 32 | datetime.date(2019, 5, 25), datetime.date(2019, 5, 27), 33 | datetime.date(2019, 5, 30), datetime.date(2019, 6, 1), 34 | datetime.date(2019, 6, 2), datetime.date(2019, 6, 3), 35 | datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), 36 | datetime.date(2019, 7, 4), datetime.date(2020, 7, 7), 37 | datetime.date(2020, 7, 8), datetime.date(2021, 6, 2), 38 | datetime.date(2021, 6, 2), datetime.date(2021, 6, 3), 39 | datetime.date(2022, 1, 31), datetime.date(2022, 2, 1), 40 | datetime.date(2022, 2, 2), datetime.date(2022, 2, 3), 41 | datetime.date(2022, 2, 4), datetime.date(2022, 2, 5), 42 | datetime.date(2022, 2, 6), datetime.date(2022, 2, 7)]] 43 | plt.figure() 44 | orig_ssims = {target_policy: [] for target_policy in buffer_based_names} 45 | expert_ssims = {target_policy: [] for target_policy in buffer_based_names} 46 | cooked_path = f'{args.dir}cooked' 47 | expert_path = f'{args.dir}2020-07-27to2021-06-01_expert_predictions' 48 | for today in tqdm(all_days): 49 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 50 | ids = np.load(f'{cooked_path}/{date_string}_ids_translated.npy', allow_pickle=True) 51 | orig_trajs = np.load(f'{cooked_path}/{date_string}_trajs.npy', allow_pickle=True) 52 | expert_bba_ssims = np.load(f'{expert_path}/{date_string}_linear_bba_ssims.npy', allow_pickle=True) 53 | expert_bola1_ssims = np.load(f'{expert_path}/{date_string}_bola1_ssims.npy', allow_pickle=True) 54 | expert_bola2_ssims = np.load(f'{expert_path}/{date_string}_bola2_ssims.npy', allow_pickle=True) 55 | for idx, policy_name in enumerate(ids): 56 | if policy_name in buffer_based_names: 57 | orig_ssims[policy_name].append(orig_trajs[idx][:-1, 8]) 58 | for idx, policy_name in enumerate(ids): 59 | if policy_name != args.left_out_policy: 60 | expert_ssims['bola_basic_v1'].append(expert_bola1_ssims[idx]) 61 | expert_ssims['bola_basic_v2'].append(expert_bola2_ssims[idx]) 62 | expert_ssims['linear_bba'].append(expert_bba_ssims[idx]) 63 | for target in buffer_based_names: 64 | orig_ssims[target] = np.concatenate(orig_ssims[target]) 65 | orig_ssims[target] = np.mean(orig_ssims[target]) 66 | orig_ssims[target] = ssim_db(orig_ssims[target]) 67 | expert_ssims[target] = np.concatenate(expert_ssims[target]) 68 | expert_ssims[target] = np.mean(expert_ssims[target]) 69 | expert_ssims[target] = ssim_db(expert_ssims[target]) 70 | 71 | orig_ssim_path = f'{args.dir}subset_orig_ssim_dicts/{args.left_out_policy}' 72 | expert_ssim_path = f'{args.dir}subset_expert_ssim_dicts/{args.left_out_policy}' 73 | 74 | os.makedirs(orig_ssim_path, exist_ok=True) 75 | os.makedirs(expert_ssim_path, exist_ok=True) 76 | 77 | with open(f'{orig_ssim_path}/orig_ssims.pkl', 'wb') as f: 78 | pickle.dump(orig_ssims, f) 79 | with open(f'{expert_ssim_path}/expert_ssims.pkl', 'wb') as f: 80 | pickle.dump(expert_ssims, f) -------------------------------------------------------------------------------- /abr-synthetic/generate_cfs.py: -------------------------------------------------------------------------------- 1 | from generate_traces import collect_traces_causalsim, collect_traces_slsim 2 | import numpy as np 3 | from policies import get_all_policies 4 | import torch 5 | from tqdm import tqdm 6 | 7 | 8 | def generate_cfs( 9 | datapath, 10 | training_datapath, 11 | models_path, 12 | test_policy_idx, 13 | config, 14 | alg="causalsim", 15 | r=None, 16 | ): 17 | buffers = np.load(f"{training_datapath}/raw_train_buffers_synthetic.npy") 18 | next_buffers = np.load(f"{training_datapath}/raw_train_next_buffers_synthetic.npy") 19 | c_hats = np.load(f"{training_datapath}/raw_train_c_hats_synthetic.npy") 20 | chosen_chunk_sizes = np.load( 21 | f"{training_datapath}/raw_train_chosen_chunk_sizes_synthetic.npy" 22 | ) 23 | min_rtts = np.load(f"{training_datapath}/raw_train_min_rtts_synthetic.npy") 24 | download_time = np.load( 25 | f"{training_datapath}/raw_train_download_time_synthetic.npy" 26 | ) 27 | 28 | buffer_mean = np.mean(buffers) 29 | next_buffer_mean = np.mean(next_buffers) 30 | c_hat_mean = 0 31 | chosen_chunk_size_mean = np.mean(chosen_chunk_sizes) 32 | min_rtts_mean = np.mean(min_rtts) 33 | download_time_mean = np.mean(download_time) 34 | 35 | buffer_std = np.std(buffers) 36 | next_buffer_std = np.std(next_buffers) 37 | c_hat_std = np.std(c_hats) 38 | chosen_chunk_size_std = np.std(chosen_chunk_sizes) 39 | min_rtts_std = np.std(min_rtts) 40 | download_time_std = np.std(download_time) 41 | 42 | policies, _, _ = get_all_policies(config) 43 | if test_policy_idx is None: 44 | pols = policies 45 | else: 46 | pols = [policies[test_policy_idx]] 47 | 48 | train_trajectories = np.load( 49 | f"{training_datapath}/train_trajectories.npy", allow_pickle=True 50 | ) 51 | features = np.zeros( 52 | [ 53 | train_trajectories.shape[0], 54 | train_trajectories.shape[1], 55 | train_trajectories.shape[2] - 1, 56 | ] 57 | ) 58 | cfs = np.zeros( 59 | [ 60 | len(pols), 61 | train_trajectories.shape[1], 62 | train_trajectories.shape[2], 63 | 3, 64 | ] 65 | ) 66 | rtts = np.load(f"{datapath}/rtts.npy") 67 | trajs = train_trajectories[0, :] 68 | if alg == "causalsim": 69 | feature_extractor = torch.load( 70 | "%sbest_feature_extractor.pth" % models_path, 71 | map_location=torch.device("cpu"), 72 | ) 73 | buffer_predictor = torch.load( 74 | "%sbest_buffer_predictor.pth" % models_path, 75 | map_location=torch.device("cpu"), 76 | ) 77 | action_factor_net = torch.load( 78 | "%sbest_action_factor.pth" % models_path, map_location=torch.device("cpu") 79 | ) 80 | 81 | for idx, traj in tqdm(enumerate(trajs)): 82 | cfs[0, idx, :], features[0, idx, :] = collect_traces_causalsim( 83 | pols, 84 | traj[1:, 4], 85 | traj[:-1, -2].astype(int), 86 | traj[:, 13 : 13 + 30], 87 | rtts[idx], 88 | feature_extractor, 89 | buffer_predictor, 90 | action_factor_net, 91 | r, 92 | buffer_mean, 93 | buffer_std, 94 | next_buffer_mean, 95 | next_buffer_std, 96 | c_hat_mean, 97 | c_hat_std, 98 | chosen_chunk_size_mean, 99 | chosen_chunk_size_std, 100 | min_rtts_mean, 101 | min_rtts_std, 102 | download_time_mean, 103 | download_time_std, 104 | ) 105 | 106 | return cfs, features 107 | elif alg == "slsim": 108 | buffer_predictor = torch.load( 109 | "%sbest_buffer_predictor.pth" % models_path, 110 | map_location=torch.device("cpu"), 111 | ) 112 | 113 | for idx, traj in tqdm(enumerate(trajs)): 114 | cfs[:, idx, :] = collect_traces_slsim( 115 | pols, 116 | traj[1:, 4], 117 | traj[:, 13 : 13 + 30], 118 | rtts[idx], 119 | buffer_predictor, 120 | buffer_mean, 121 | buffer_std, 122 | next_buffer_mean, 123 | next_buffer_std, 124 | c_hat_mean, 125 | c_hat_std, 126 | chosen_chunk_size_mean, 127 | chosen_chunk_size_std, 128 | min_rtts_mean, 129 | min_rtts_std, 130 | download_time_mean, 131 | download_time_std, 132 | ) 133 | return cfs 134 | -------------------------------------------------------------------------------- /abr-puffer/analysis/original_subset_stall.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import datetime 5 | import argparse 6 | import pickle 7 | from tqdm import tqdm 8 | import os 9 | 10 | 11 | def get_stall(traj: np.ndarray or List[int]) -> float: 12 | if len(traj) > 1: 13 | assert traj[1] > 0, traj 14 | return traj[-1] - traj[1] 15 | else: 16 | assert traj[-1] == 0, traj 17 | return 0 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--dir", help="source directory") 21 | parser.add_argument("--left_out_policy", type=str, help="left out policy") 22 | args = parser.parse_args() 23 | NUMBER_OF_BINS = 10000 24 | left_out_text = f'_{args.left_out_policy}' 25 | PERIOD_TEXT = f'2020-07-27to2021-06-01{left_out_text}' 26 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 27 | buffer_based_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 28 | start_date = datetime.date(2020, 7, 27) 29 | end_date = datetime.date(2021, 6, 1) 30 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 31 | all_days = [day for day in all_days if day not in [datetime.date(2019, 5, 12), datetime.date(2019, 5, 13), 32 | datetime.date(2019, 5, 15), datetime.date(2019, 5, 17), 33 | datetime.date(2019, 5, 18), datetime.date(2019, 5, 19), 34 | datetime.date(2019, 5, 25), datetime.date(2019, 5, 27), 35 | datetime.date(2019, 5, 30), datetime.date(2019, 6, 1), 36 | datetime.date(2019, 6, 2), datetime.date(2019, 6, 3), 37 | datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), 38 | datetime.date(2019, 7, 4), datetime.date(2020, 7, 7), 39 | datetime.date(2020, 7, 8), datetime.date(2021, 6, 2), 40 | datetime.date(2021, 6, 2), datetime.date(2021, 6, 3), 41 | datetime.date(2022, 1, 31), datetime.date(2022, 2, 1), 42 | datetime.date(2022, 2, 2), datetime.date(2022, 2, 3), 43 | datetime.date(2022, 2, 4), datetime.date(2022, 2, 5), 44 | datetime.date(2022, 2, 6), datetime.date(2022, 2, 7)]] 45 | plt.figure() 46 | orig_data = {target_policy: {'lens': [], 'rebuffs': []} for target_policy in buffer_based_names} 47 | expert_data = {target_policy: {'lens': [], 'rebuffs': []} for target_policy in buffer_based_names} 48 | cooked_path = f'{args.dir}cooked' 49 | expert_path = f'{args.dir}2020-07-27to2021-06-01_expert_predictions' 50 | for today in tqdm(all_days): 51 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 52 | ids = np.load(f'{cooked_path}/{date_string}_ids_translated.npy', allow_pickle=True) 53 | orig_trajs = np.load(f'{cooked_path}/{date_string}_trajs.npy', allow_pickle=True) 54 | expert_bba_rebuffs = np.load(f'{expert_path}/{date_string}_linear_bba_rebuffs.npy', allow_pickle=True) 55 | expert_bola1_rebuffs = np.load(f'{expert_path}/{date_string}_bola1_rebuffs.npy', allow_pickle=True) 56 | expert_bola2_rebuffs = np.load(f'{expert_path}/{date_string}_bola2_rebuffs.npy', allow_pickle=True) 57 | for idx, policy_name in enumerate(ids): 58 | if policy_name in buffer_based_names: 59 | orig_data[policy_name]['rebuffs'].append( 60 | np.sum(np.maximum(orig_trajs[idx][1:-1, 6] - orig_trajs[idx][1:-1, 0], 0))) 61 | orig_data[policy_name]['lens'].append((orig_trajs[idx].shape[0] - 1) * 2.002 + orig_data[policy_name]['rebuffs'][-1]) 62 | if policy_name != args.left_out_policy: 63 | expert_data['bola_basic_v1']['rebuffs'].append(get_stall(expert_bola1_rebuffs[idx])) 64 | expert_data['bola_basic_v2']['rebuffs'].append(get_stall(expert_bola2_rebuffs[idx])) 65 | expert_data['linear_bba']['rebuffs'].append(get_stall(expert_bba_rebuffs[idx])) 66 | expert_data['bola_basic_v1']['lens'].append((len(expert_bola1_rebuffs[idx]) - 1) * 2.002 + expert_data['bola_basic_v1']['rebuffs'][-1]) 67 | expert_data['bola_basic_v2']['lens'].append((len(expert_bola2_rebuffs[idx]) - 1) * 2.002 + expert_data['bola_basic_v2']['rebuffs'][-1]) 68 | expert_data['linear_bba']['lens'].append((len(expert_bba_rebuffs[idx]) - 1) * 2.002 + expert_data['linear_bba']['rebuffs'][-1]) 69 | 70 | 71 | orig_stall_path = f'{args.dir}subset_orig_rebuff_dicts/{args.left_out_policy}' 72 | expert_stall_path = f'{args.dir}subset_expert_rebuff_dicts/{args.left_out_policy}' 73 | 74 | os.makedirs(orig_stall_path, exist_ok=True) 75 | os.makedirs(expert_stall_path, exist_ok=True) 76 | 77 | with open(f'{orig_stall_path}/orig_rebuffs.pkl', 'wb') as f: 78 | pickle.dump(orig_data, f) 79 | with open(f'{expert_stall_path}/expert_rebuffs.pkl', 'wb') as f: 80 | pickle.dump(expert_data, f) -------------------------------------------------------------------------------- /abr-puffer/data_preparation/generate_subset_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import datetime 3 | from tqdm import tqdm 4 | import os 5 | import argparse 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--dir", help="root directory") 9 | args = parser.parse_args() 10 | 11 | policy_names = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 12 | target_policy_names = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 13 | 14 | 15 | def convert_id_to_number(id, current_policies): 16 | return current_policies.index(id) 17 | 18 | 19 | def whiten(raw_data): 20 | mean = np.mean(raw_data) 21 | std = np.std(raw_data) 22 | white_data = (raw_data - mean) / std 23 | return white_data, mean, std 24 | 25 | 26 | def save(dir, white_data, mean, std, name): 27 | np.save(f'{dir}/white_{name}s.npy', white_data) 28 | np.save(f'{dir}/{name}s_mean.npy', mean) 29 | np.save(f'{dir}/{name}s_std.npy', std) 30 | 31 | 32 | start_date = datetime.date(2020, 7, 27) 33 | end_date = datetime.date(2021, 6, 1) 34 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 35 | all_days = [day for day in all_days if day not in [datetime.date(2019, 5, 12), datetime.date(2019, 5, 13), 36 | datetime.date(2019, 5, 15), datetime.date(2019, 5, 17), 37 | datetime.date(2019, 5, 18), datetime.date(2019, 5, 19), 38 | datetime.date(2019, 5, 25), datetime.date(2019, 5, 27), 39 | datetime.date(2019, 5, 30), datetime.date(2019, 6, 1), 40 | datetime.date(2019, 6, 2), datetime.date(2019, 6, 3), 41 | datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), 42 | datetime.date(2019, 7, 4), datetime.date(2020, 7, 7), 43 | datetime.date(2020, 7, 8), datetime.date(2021, 6, 2), 44 | datetime.date(2021, 6, 2), datetime.date(2021, 6, 3), 45 | datetime.date(2022, 1, 31), datetime.date(2022, 2, 1), 46 | datetime.date(2022, 2, 2), datetime.date(2022, 2, 3), 47 | datetime.date(2022, 2, 4), datetime.date(2022, 2, 5), 48 | datetime.date(2022, 2, 6), datetime.date(2022, 2, 7)]] 49 | load_dir = f'{args.dir}cooked' 50 | for left_out_policy in target_policy_names: 51 | buffers = [] 52 | next_buffers = [] 53 | chosen_chunk_sizes = [] 54 | c_hats = [] 55 | numbers = [] 56 | download_times = [] 57 | current_policies = policy_names.copy() 58 | current_policies.remove(left_out_policy) 59 | print('left_out: ', left_out_policy, 'training_data: ', current_policies) 60 | for today in tqdm(all_days): 61 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 62 | trajs = np.load(f"{load_dir}/{date_string}_trajs.npy", allow_pickle=True) 63 | ids = np.load(f"{load_dir}/{date_string}_ids_translated.npy", allow_pickle=True) 64 | for idx, traj in enumerate(trajs): 65 | policy_name = ids[idx] 66 | if policy_name != left_out_policy: 67 | number = convert_id_to_number(policy_name, current_policies) 68 | c_hats.extend(np.divide(traj[:-1, 7], traj[:-1, 6])) 69 | chosen_chunk_sizes.extend(traj[:-1, 7]) 70 | buffers.extend(traj[:-1, 0]) 71 | next_buffers.extend(traj[1:, 0]) 72 | download_times.extend(traj[:-1, 6]) 73 | numbers.extend([number for _ in traj[:-1, 0]]) 74 | buffers = np.array(buffers) 75 | next_buffers = np.array(next_buffers) 76 | chosen_chunk_sizes = np.array(chosen_chunk_sizes) 77 | numbers = np.array(numbers) 78 | c_hats = np.array(c_hats) 79 | download_times = np.array(download_times) 80 | save_dir = f'{args.dir}subset_data/{left_out_policy}' 81 | os.makedirs(save_dir, exist_ok=True) 82 | 83 | white_buffs, buffs_mean, buffs_std = whiten(buffers) 84 | white_next_buffs, next_buffs_mean, next_buffs_std = whiten(next_buffers) 85 | white_chats, chats_mean, chats_std = whiten(c_hats) 86 | white_actions, actions_mean, actions_std = whiten(chosen_chunk_sizes) 87 | white_dts, dts_mean, dts_std = whiten(download_times) 88 | 89 | assert len(white_buffs) == len(white_next_buffs) == len(white_chats) == len(white_actions) == len(white_dts) 90 | 91 | save(dir=save_dir, white_data=white_buffs, mean=buffs_mean, std=buffs_std, name='buff') 92 | save(dir=save_dir, white_data=white_next_buffs, mean=next_buffs_mean, std=next_buffs_std, name='next_buff') 93 | save(dir=save_dir, white_data=white_chats, mean=chats_mean, std=chats_std, name='chat') 94 | save(dir=save_dir, white_data=white_actions, mean=actions_mean, std=actions_std, name='action') 95 | save(dir=save_dir, white_data=white_dts, mean=dts_mean, std=dts_std, name='dt') 96 | 97 | np.save(f'{save_dir}/policy_numbers.npy', np.array(numbers)) 98 | -------------------------------------------------------------------------------- /abr-puffer/inference/extract_subset_latents.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import datetime 4 | import torch.nn as nn 5 | import os 6 | import argparse 7 | from tqdm import tqdm 8 | 9 | parser = argparse.ArgumentParser() 10 | 11 | parser.add_argument("--dir", help="root directory") 12 | parser.add_argument("--C", type=float, help="discriminator loss coefficient") 13 | parser.add_argument("--left_out_policy", type=str, help="left out policy") 14 | parser.add_argument("--month", type=int, default=None) 15 | parser.add_argument("--year", type=int, default=None) 16 | parser.add_argument("--model_number", type=int, help="saved model epoch number", default=5000) 17 | args = parser.parse_args() 18 | 19 | 20 | def mlp(sizes, activation, output_activation=nn.Identity): 21 | layers = [] 22 | for j in range(len(sizes) - 1): 23 | act = activation if j < len(sizes) - 2 else output_activation 24 | layers += [nn.Linear(sizes[j], sizes[j + 1]), act()] 25 | return nn.Sequential(*layers) 26 | 27 | 28 | class MLP(nn.Module): 29 | 30 | def __init__(self, input_dim, output_dim, hidden_sizes, activation): 31 | super().__init__() 32 | self.predict = mlp(sizes=[input_dim] + list(hidden_sizes) + [output_dim], activation=activation, 33 | output_activation=nn.Identity) 34 | 35 | def forward(self, raw_input): 36 | prediction = self.predict(raw_input) 37 | return prediction 38 | 39 | 40 | def extract(chat_list, chosen_size_list, feature_extractor, chat_mean, chat_std, size_mean, size_std): 41 | extracted_latents = [] 42 | for step in range(len(chat_list)): 43 | chat = chat_list[step] 44 | orig_chosen_size = chosen_size_list[step] 45 | chat_white = (chat - chat_mean) / chat_std 46 | orig_chosen_size_white = (orig_chosen_size - size_mean) / size_std 47 | input_numpy = np.array([chat_white, orig_chosen_size_white]) 48 | input_numpy = np.expand_dims(input_numpy, axis=0) 49 | input_tensor = torch.as_tensor(input_numpy, dtype=torch.float32, device=torch.device('cpu')) 50 | with torch.no_grad(): 51 | feature_tensor = feature_extractor(input_tensor) 52 | extracted_feature = feature_tensor.cpu().numpy() 53 | extracted_latents.append(extracted_feature) 54 | return extracted_latents 55 | 56 | 57 | DISCRIMINATOR_EPOCH = 10 58 | C = args.C 59 | left_out_text = f'_{args.left_out_policy}' 60 | PERIOD_TEXT = f'2020-07-27to2021-06-01{left_out_text}' 61 | latent_path = f'{args.dir}{PERIOD_TEXT}_features/inner_loop_{DISCRIMINATOR_EPOCH}/C_{C}/' \ 62 | f'model_{args.model_number}' 63 | os.makedirs(latent_path, exist_ok=True) 64 | 65 | data_path = f'{args.dir}subset_data/{args.left_out_policy}' 66 | chats_mean = np.load(f'{data_path}/chats_mean.npy') 67 | actions_mean = np.load(f'{data_path}/actions_mean.npy') 68 | chats_std = np.load(f'{data_path}/chats_std.npy') 69 | actions_std = np.load(f'{data_path}/actions_std.npy') 70 | 71 | start_date = datetime.date(2020, 7, 27) 72 | end_date = datetime.date(2021, 6, 1) 73 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 74 | all_days = [day for day in all_days if day not in [datetime.date(2019, 5, 12), datetime.date(2019, 5, 13), 75 | datetime.date(2019, 5, 15), datetime.date(2019, 5, 17), 76 | datetime.date(2019, 5, 18), datetime.date(2019, 5, 19), 77 | datetime.date(2019, 5, 25), datetime.date(2019, 5, 27), 78 | datetime.date(2019, 5, 30), datetime.date(2019, 6, 1), 79 | datetime.date(2019, 6, 2), datetime.date(2019, 6, 3), 80 | datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), 81 | datetime.date(2019, 7, 4), datetime.date(2020, 7, 7), 82 | datetime.date(2020, 7, 8), datetime.date(2021, 6, 2), 83 | datetime.date(2021, 6, 2), datetime.date(2021, 6, 3), 84 | datetime.date(2022, 1, 31), datetime.date(2022, 2, 1), 85 | datetime.date(2022, 2, 2), datetime.date(2022, 2, 3), 86 | datetime.date(2022, 2, 4), datetime.date(2022, 2, 5), 87 | datetime.date(2022, 2, 6), datetime.date(2022, 2, 7)]] 88 | 89 | if args.month is not None and args.year is not None: 90 | all_days = [date for date in all_days if date.month == args.month and date.year == args.year] 91 | model_path = f'{args.dir}{PERIOD_TEXT}_trained_models/inner_loop_{DISCRIMINATOR_EPOCH}/C_{C}' 92 | feature_extractor = torch.load(f"{model_path}/{args.model_number}_feature_extractor.pth", 93 | map_location=torch.device('cpu')).cpu() 94 | cooked_path = f'{args.dir}cooked' 95 | 96 | for today in tqdm(all_days): 97 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 98 | trajs = np.load(f"{cooked_path}/{date_string}_trajs.npy", allow_pickle=True) 99 | latents = [] 100 | for traj in trajs: 101 | features = extract(np.divide(traj[:-1, 7], traj[:-1, 6]), traj[:-1, 7], feature_extractor, chats_mean, 102 | chats_std, actions_mean, actions_std) 103 | latents.append(features) 104 | np.save(f'{latent_path}/{date_string}_features.npy', latents) 105 | del latents -------------------------------------------------------------------------------- /abr-synthetic/create_dataset_and_expertsim.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | import numpy as np 4 | from tqdm import tqdm, trange 5 | from policies import Agent 6 | import argparse 7 | 8 | from env.abr import ABRSimEnv 9 | from policies import get_all_policies 10 | 11 | 12 | def run_trajectories(env: ABRSimEnv, policy_agent: Agent, save_path: str) -> np.ndarray: 13 | # shorthand 14 | num_traces = len(env.all_traces) 15 | len_vid = env.total_num_chunks 16 | size_obs = env.obs_high.shape[0] 17 | 18 | # trajectory to return, each step has obs{of size_obs}, action{of size 1}, reward{of size 1} 19 | traj = np.empty((num_traces, len_vid, size_obs + 1 + 1)) 20 | 21 | for trace_index in trange(num_traces): 22 | # Choose specific trace and start from the initial point in the trace 23 | obs = env.reset(trace_choice=trace_index) 24 | 25 | for epi_step in range(len_vid): 26 | # choose action through policy 27 | act = policy_agent.take_action(obs) 28 | 29 | # take action 30 | next_obs, rew, done, info = env.step(act) 31 | if np.abs((next_obs[9] - obs[-6+act])/next_obs[9] * 100) > 1e-8: 32 | import pdb 33 | pdb.set_trace() 34 | assert False 35 | if np.abs((next_obs[4] - obs[-12+act])/next_obs[4] * 100) > 1e-8: 36 | assert False 37 | 38 | # save action 39 | traj[trace_index][epi_step][:size_obs] = obs 40 | traj[trace_index][epi_step][size_obs] = act 41 | traj[trace_index][epi_step][size_obs+1] = rew 42 | 43 | # episode should not finish before video length 44 | assert not done or epi_step == len_vid-1 45 | 46 | # next state 47 | obs = next_obs 48 | 49 | np.save(save_path, traj) 50 | 51 | return traj 52 | 53 | 54 | def run_expert_cf(traj: np.ndarray, cf_path: str, policies: List[Agent], mpc_lookback: int): 55 | if os.path.exists(cf_path): 56 | print(f'CF for {cf_path} already exists, continuing') 57 | raise OSError 58 | chunk_sizes = traj[0, :, 2*mpc_lookback+3:2*mpc_lookback+3+ABRSimEnv.CHOICES] 59 | cf_traj = np.empty((len(policies), traj.shape[0], traj.shape[1], 3)) 60 | one_matched = False 61 | 62 | for p, policy_agent in enumerate(tqdm(policies, leave=False)): 63 | for i in tqdm(range(traj.shape[0]), leave=False): 64 | obs = np.array(traj[i, 0, :-14]) 65 | for j in range(traj.shape[1]-1): 66 | cf_traj[p, i, j, 0] = obs[2*mpc_lookback] 67 | cf_traj[p, i, j, 1] = obs[2*mpc_lookback-1] 68 | cf_traj[p, i, j, 2] = obs[2*mpc_lookback+2] 69 | act = policy_agent.take_action(obs) 70 | 71 | dtime_orig = traj[i, j+1, 2*mpc_lookback-1] 72 | dtime = dtime_orig / chunk_sizes[j, int(traj[i, j, -2])] * chunk_sizes[j, act] 73 | obs[:mpc_lookback] = traj[i, j+1, :mpc_lookback] 74 | obs[mpc_lookback:2*mpc_lookback-1] = obs[mpc_lookback+1:2*mpc_lookback] 75 | obs[2*mpc_lookback-1] = dtime 76 | obs[2*mpc_lookback] = min(max(obs[2*mpc_lookback] - dtime, 0) + ABRSimEnv.CHUNK_LENGTH, 77 | ABRSimEnv.MAX_BUFF_S) 78 | obs[2*mpc_lookback + 1] -= 1 79 | obs[2 * mpc_lookback + 2] = act 80 | obs[2 * mpc_lookback + 2:] = np.array(traj[i, j+1, 2 * mpc_lookback + 2:-14]) 81 | cf_traj[p, i, -1, 0] = obs[2 * mpc_lookback] 82 | cf_traj[p, i, -1, 1] = obs[2 * mpc_lookback - 1] 83 | cf_traj[p, i, -1, 2] = obs[2 * mpc_lookback + 2] 84 | # Must do this to keep seeds consistent 85 | policy_agent.take_action(obs) 86 | 87 | cf = cf_traj[p, :, :] 88 | orig = traj[:, :, [2 * mpc_lookback, 2 * mpc_lookback-1, 2 * mpc_lookback + 2]] 89 | one_matched = one_matched or np.allclose(cf, orig) 90 | 91 | assert one_matched 92 | 93 | np.save(cf_path, cf_traj) 94 | 95 | 96 | def main(): 97 | parser = argparse.ArgumentParser(description='parameters') 98 | 99 | # -- Basic -- 100 | parser.add_argument('--seed', type=int, default=42, help='random seed (default: 42)') 101 | parser.add_argument('--eps', type=float, default=1e-6, help='epsilon (default: 1e-6)') 102 | parser.add_argument('--trace_sim_count', type=int, default=5000, help='Number of generated traces (default: 5000)') 103 | parser.add_argument('--bba_reservoir', type=float, default=5, help='BBA - Reservoir (default: 5)') 104 | parser.add_argument('--bba_cushion', type=float, default=10, help='BBA - Cushion (default: 10)') 105 | parser.add_argument('--mpc_lookback', type=int, default=5, help='MPC - Throughput lookback (default: 5)') 106 | parser.add_argument('--mpc_lookahead', type=int, default=5, help='MPC - Throughput lookahead (default: 5)') 107 | parser.add_argument('--dir', type=str, required=True, help='Output folder') 108 | 109 | config = parser.parse_args() 110 | 111 | # Create output folder 112 | os.makedirs(config.dir, exist_ok=True) 113 | 114 | # set up environments for workers 115 | print('Setting up environment..') 116 | env = ABRSimEnv(mpc_lookahead=config.mpc_lookahead, mpc_lookback=config.mpc_lookback, seed=config.seed, 117 | trace_folder=config.dir, num_traces=config.trace_sim_count) 118 | 119 | for pol, name, path in zip(*get_all_policies(config)): 120 | print(f'Starting {name}..') 121 | traj = run_trajectories(env, pol, f'{config.dir}/{path}') 122 | run_expert_cf(traj, f'{config.dir}/cf_{path}', get_all_policies(config)[0], config.mpc_lookback) 123 | 124 | print('DONE') 125 | 126 | 127 | if __name__ == '__main__': 128 | main() 129 | -------------------------------------------------------------------------------- /load-balancing/main_slsim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import os 4 | from causalsim import train_causal_sim 5 | from slsim import train_slsim 6 | import argparse 7 | from generate_cfs import generate_cfs 8 | import matplotlib.pyplot as plt 9 | import torch 10 | 11 | NO_POLICIES = 16 12 | POLICIES = [ 13 | "random", 14 | "Shortest queue", 15 | "pow2", 16 | "pow3", 17 | "pow4", 18 | "pow5", 19 | "PseudoOptimal", 20 | "Tracker", 21 | ] 22 | 23 | 24 | DATAPATH = "non_iid_0_big.pkl" 25 | torch.manual_seed(0) 26 | 27 | 28 | def cdf(x, plot=True, *args, **kwargs): 29 | x = sorted(x)[:] 30 | x, y = sorted(x), np.arange(len(x)) / len(x) 31 | return plt.plot(x, 100 * y, *args, **kwargs) if plot else (x, y) 32 | 33 | 34 | def get_mape(truth, estimate, policy_assignment): 35 | policies = np.unique(policy_assignment) 36 | mapes = [] 37 | for p in policies: 38 | truth_p = truth[policy_assignment == p, :] 39 | estimate_p = estimate[policy_assignment == p, :] 40 | mape_p = 100 * (np.abs(estimate_p - truth_p) / truth_p).mean() 41 | mapes.append(mape_p) 42 | return mapes 43 | 44 | 45 | def load_and_create_datasets(dict_exp, policy_out, dir_out, loss): 46 | policy_out_idx = POLICIES.index(policy_out) 47 | 48 | actions = dict_exp["actions"] 49 | processing_times = dict_exp["proc_times"] 50 | no_traj, T = processing_times.shape[1], processing_times.shape[2] 51 | policies_range = np.arange(NO_POLICIES) 52 | policies_range = np.delete(policies_range, [policy_out_idx]) 53 | numbers_policies = np.random.choice(policies_range, size=no_traj).astype(int) 54 | data_all = np.zeros([no_traj, T, 2]) 55 | processing_times_list = [] 56 | numbers = [] 57 | actions_list = [] 58 | for i, policy in enumerate(policies_range): 59 | actions_policy = actions[policy, numbers_policies == policy, :] 60 | pt_policy = processing_times[policy, numbers_policies == policy, :] 61 | data_all[numbers_policies == policy, :, 0] = actions_policy 62 | data_all[numbers_policies == policy, :, 1] = pt_policy 63 | processing_times_list.extend(pt_policy.flatten()) 64 | actions_list.extend(actions_policy.flatten()) 65 | numbers.extend([i for _ in range(actions_policy.flatten().shape[0])]) 66 | file_name = f"{policy_out}_{loss}" 67 | savepath = f"{dir_out}/train_data_{file_name}/" 68 | try: 69 | os.makedirs(savepath) 70 | except: 71 | pass 72 | 73 | np.save(f"{savepath}/train_trajectories.npy", data_all) 74 | max_pol = max(numbers) 75 | train_indices = [i for i in range(len(numbers)) if numbers[i] <= max_pol] 76 | 77 | train_pt = np.array(processing_times_list)[train_indices] 78 | train_actions = np.array(actions_list)[train_indices].astype(int) 79 | train_numbers = np.array(numbers)[train_indices] 80 | 81 | np.save(f"{savepath}/raw_train_pt.npy", np.array(train_pt)) 82 | np.save(f"{savepath}/raw_train_actions.npy", np.array(train_actions)) 83 | np.save(f"{savepath}/raw_train_numbers.npy", np.array(train_numbers)) 84 | 85 | np.save(f"{savepath}/policy_assignment.npy", np.array(numbers_policies)) 86 | 87 | # train_pt = train_pt - np.mean(train_pt) 88 | train_pt = train_pt / np.std(train_pt) 89 | 90 | train_actions_onehot = np.zeros([train_actions.shape[0], int(actions.max()) + 1]) 91 | 92 | train_actions_onehot[np.arange(train_actions.shape[0]), train_actions] = 1 93 | 94 | train_inputs = np.concatenate( 95 | (np.expand_dims(train_pt, axis=1), train_actions_onehot), axis=1 96 | ) 97 | train_outputs = np.concatenate( 98 | (np.expand_dims(train_pt, axis=1), np.expand_dims(train_numbers, axis=1)), 99 | axis=1, 100 | ) 101 | np.save(f"{savepath}/white_train_inputs.npy", train_inputs) 102 | np.save(f"{savepath}/white_train_outputs.npy", train_outputs) 103 | assert len(train_inputs) == len(train_outputs) 104 | return savepath 105 | 106 | 107 | def parse_args(): 108 | parser = argparse.ArgumentParser(description="main parser") 109 | parser.add_argument( 110 | "--dir", type=str, required=True, help="Load Balance dataset directory" 111 | ) 112 | parser.add_argument( 113 | "--policy_out", 114 | type=str, 115 | default="random", 116 | help=f"choose test policy from {POLICIES}", 117 | ) 118 | parser.add_argument( 119 | "--loss", 120 | type=str, 121 | default="mse_loss", 122 | help="choose the loss from mse_loss, l1_loss, huber_loss", 123 | ) 124 | return parser.parse_args() 125 | 126 | 127 | def main(): 128 | # get parameters 129 | args = parse_args() 130 | with open(f"{args.dir}/{DATAPATH}", "rb") as fandle: 131 | dict_exp = pickle.load(fandle) 132 | 133 | policy_index = POLICIES.index(args.policy_out) 134 | # generate data wihtout the test policy 135 | print("GENERATE DATASETS .. ") 136 | generated_datapath = load_and_create_datasets( 137 | dict_exp, args.policy_out, args.dir, args.loss 138 | ) 139 | 140 | print("TRAIN SLSIM .. ") 141 | # train direct 142 | train_slsim( 143 | generated_datapath, 144 | models_path=f"{args.dir}/models/{args.policy_out}/{args.loss}", 145 | loss=args.loss, 146 | ) 147 | print("GENERATE SLSIM COUNTERFACTUALS") 148 | 149 | alg = "slsim" 150 | cf_slsim, features_slsim = generate_cfs( 151 | dict_exp, 152 | generated_datapath, 153 | models_path=f"{args.dir}/models/{args.policy_out}/{args.loss}/{alg}/", 154 | test_policy_idx=None, 155 | alg=alg, 156 | N_test=5000, 157 | ) 158 | np.save(f"cfs/cf_{args.policy_out}_{args.loss}.npy", cf_slsim) 159 | print("Plotting results") 160 | policy_assignment = np.load(f"{generated_datapath}/policy_assignment.npy") 161 | truth_processing_time = dict_exp["proc_times"] 162 | 163 | MAPE_slsim = get_mape( 164 | truth_processing_time[policy_index, :, :], 165 | cf_slsim[0, ..., 0], 166 | policy_assignment, 167 | ) 168 | os.makedirs("figures", exist_ok=True) 169 | 170 | plt.figure() 171 | plt.title("processing time") 172 | cdf(MAPE_slsim, label="Direct") 173 | plt.xlabel("Processing time MAPE") 174 | plt.ylabel("CDF(%)") 175 | plt.legend() 176 | plt.savefig(f"figures/processing_time_MAPE_{args.policy_out}.png") 177 | 178 | truth_latency = dict_exp["latencies"] 179 | 180 | MAPE_slsim = get_mape( 181 | truth_latency[policy_index, :, :], 182 | cf_slsim[0, ..., 1], 183 | policy_assignment, 184 | ) 185 | 186 | plt.figure() 187 | plt.title("latency") 188 | cdf(MAPE_slsim, label="Direct") 189 | plt.xlabel("Latency MAPE") 190 | plt.ylabel("CDF(%)") 191 | plt.legend() 192 | plt.savefig(f"figures/latency_MAPE_{args.policy_out}.png") 193 | 194 | 195 | if __name__ == "__main__": 196 | main() 197 | -------------------------------------------------------------------------------- /abr-puffer/training/train_subset.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from torch.utils.tensorboard import SummaryWriter 6 | import os 7 | import argparse 8 | from tqdm import tqdm 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--dir", help="root directory") 12 | parser.add_argument("--C", type=float, help="discriminator loss coefficient") 13 | parser.add_argument("--left_out_policy", type=str, help="left out policy") 14 | parser.add_argument("--device", type=str, help="Compute device", default='cuda:0') 15 | parser.add_argument("--batch_size", type=int, default=17) 16 | args = parser.parse_args() 17 | 18 | torch.manual_seed(10) 19 | np.random.seed(10) 20 | 21 | BATCH_SIZE = 2 ** args.batch_size 22 | DISCRIMINATOR_EPOCH = 10 23 | C = args.C 24 | device = torch.device(args.device) 25 | left_out_text = f'_{args.left_out_policy}' 26 | PERIOD_TEXT = f'2020-07-27to2021-06-01{left_out_text}' 27 | 28 | 29 | def mlp(sizes, activation, output_activation=nn.Identity): 30 | layers = [] 31 | for j in range(len(sizes) - 1): 32 | act = activation if j < len(sizes) - 2 else output_activation 33 | layers += [nn.Linear(sizes[j], sizes[j + 1]), act()] 34 | return nn.Sequential(*layers) 35 | 36 | 37 | class MLP(nn.Module): 38 | 39 | def __init__(self, input_dim, output_dim, hidden_sizes, activation): 40 | super().__init__() 41 | self.predict = mlp(sizes=[input_dim] + list(hidden_sizes) + [output_dim], activation=activation, 42 | output_activation=nn.Identity) 43 | 44 | def forward(self, raw_input): 45 | prediction = self.predict(raw_input) 46 | return prediction 47 | 48 | 49 | new_path = f'{args.dir}{PERIOD_TEXT}_trained_models/inner_loop_{DISCRIMINATOR_EPOCH}/C_{C}' 50 | os.makedirs(new_path, exist_ok=True) 51 | data_dir = f'{args.dir}subset_data/{args.left_out_policy}' 52 | dts = np.load(f'{data_dir}/white_dts.npy') 53 | buffs = np.load(f'{data_dir}/white_buffs.npy') 54 | next_buffs = np.load(f'{data_dir}/white_next_buffs.npy') 55 | policy_numbers = np.load(f'{data_dir}/policy_numbers.npy') 56 | chats = np.load(f'{data_dir}/white_chats.npy') 57 | actions = np.load(f'{data_dir}/white_actions.npy') 58 | data_size = len(chats) 59 | dt_tensors = torch.as_tensor(dts, dtype=torch.float32, device=device) 60 | del dts 61 | buff_tensors = torch.as_tensor(buffs, dtype=torch.float32, device=device) 62 | next_buff_tensors = torch.as_tensor(next_buffs, dtype=torch.float32, device=device) 63 | del buffs, next_buffs 64 | chat_tensors = torch.as_tensor(chats, dtype=torch.float32, device=device) 65 | action_tensors = torch.as_tensor(actions, dtype=torch.float32, device=device) 66 | policy_number_tensors = torch.as_tensor(policy_numbers, dtype=torch.long, device=device) 67 | del chats, actions, policy_numbers 68 | 69 | feature_extractor = MLP(input_dim=2, output_dim=1, hidden_sizes=[128, 128], activation=nn.ReLU).to(device=device) 70 | predictor = MLP(input_dim=3, output_dim=2, hidden_sizes=[128, 128], activation=nn.ReLU).to(device=device) 71 | discriminator = MLP(input_dim=1, output_dim=5, hidden_sizes=[128, 128], activation=nn.ReLU).to(device=device) 72 | huber_loss = nn.HuberLoss(delta=0.2) 73 | ce_loss = nn.CrossEntropyLoss() 74 | feature_extractor_optimizer = torch.optim.Adam(feature_extractor.parameters()) 75 | predictor_optimizer = torch.optim.Adam(predictor.parameters()) 76 | discriminator_optimizer = torch.optim.Adam(discriminator.parameters()) 77 | writer_train = SummaryWriter(log_dir=f"{args.dir}logs/subset_{args.left_out_policy}/" 78 | f"inner_loop_{DISCRIMINATOR_EPOCH}/C_{C}") 79 | for epoch in tqdm(range(5000)): 80 | t1 = time.time() 81 | # Discriminator inner training loop: 82 | train_loss_list = [] 83 | for discriminator_epoch in range(DISCRIMINATOR_EPOCH): 84 | discriminator_optimizer.zero_grad() 85 | idx = np.random.choice(data_size, size=BATCH_SIZE) 86 | feature_tensors = feature_extractor( 87 | torch.cat((chat_tensors[idx].unsqueeze(1), action_tensors[idx].unsqueeze(1)), dim=1)) 88 | policy_gt_tensors = policy_number_tensors[idx] 89 | discriminated_tensors = discriminator(feature_tensors) 90 | assert discriminated_tensors.shape[0] == policy_gt_tensors.shape[0] 91 | discriminator_loss = ce_loss(discriminated_tensors, policy_gt_tensors) 92 | discriminator_loss.backward() 93 | discriminator_optimizer.step() 94 | train_loss_list.append(discriminator_loss.cpu().detach().item()) 95 | writer_train.add_scalar("discriminator_loss", min(train_loss_list), epoch) 96 | writer_train.add_scalar("training/elapsed_disc", time.time() - t1, epoch) 97 | 98 | t1 = time.time() 99 | # Predictor training: 100 | idx = np.random.choice(data_size, size=BATCH_SIZE) 101 | feature_extractor_optimizer.zero_grad() 102 | predictor_optimizer.zero_grad() 103 | feature_tensors = feature_extractor( 104 | torch.cat((chat_tensors[idx].unsqueeze(1), action_tensors[idx].unsqueeze(1)), dim=1)) 105 | pred_tensors = predictor( 106 | torch.cat((buff_tensors[idx].unsqueeze(1), action_tensors[idx].unsqueeze(1), feature_tensors), dim=1)) 107 | dt_pred_tensors = pred_tensors[:, 1:2] 108 | dt_gt_tensors = dt_tensors[idx].unsqueeze(1) 109 | buff_pred_tensors = pred_tensors[:, 0:1] 110 | buff_gt_tensors = next_buff_tensors[idx].unsqueeze(1) 111 | assert buff_pred_tensors.shape == buff_gt_tensors.shape, f'{buff_pred_tensors.shape}, {buff_gt_tensors.shape}' 112 | assert dt_pred_tensors.shape == dt_gt_tensors.shape, f'{dt_pred_tensors.shape}, {dt_gt_tensors.shape}' 113 | buff_pred_loss = huber_loss(buff_pred_tensors, buff_gt_tensors) 114 | dt_pred_loss = huber_loss(dt_pred_tensors, dt_gt_tensors) 115 | pred_loss = (dt_pred_loss + buff_pred_loss) / 2 116 | discriminated_tensors = discriminator(feature_tensors) 117 | policy_gt_tensors = policy_number_tensors[idx] 118 | assert policy_gt_tensors.shape[0] == discriminated_tensors.shape[0] 119 | fool_loss = ce_loss(discriminated_tensors, policy_gt_tensors) 120 | total_loss = pred_loss - C * fool_loss 121 | writer_train.add_scalar("predictor_loss/dt_prediction", dt_pred_loss.cpu().detach().item(), epoch) 122 | writer_train.add_scalar("predictor_loss/buff_prediction", buff_pred_loss.cpu().detach().item(), epoch) 123 | writer_train.add_scalar("predictor_loss/discriminator", fool_loss.cpu().detach().item(), epoch) 124 | writer_train.add_scalar("predictor_loss/total", total_loss.cpu().detach().item(), epoch) 125 | writer_train.add_scalar("training/elapsed_pred", time.time() - t1, epoch) 126 | total_loss.backward() 127 | feature_extractor_optimizer.step() 128 | predictor_optimizer.step() 129 | if epoch % 100 == 99: 130 | torch.save(feature_extractor, f'{new_path}/{epoch + 1}_feature_extractor.pth') 131 | torch.save(predictor, f'{new_path}/{epoch + 1}_predictor.pth') 132 | torch.save(discriminator, f'{new_path}/{epoch + 1}_discriminator.pth') 133 | -------------------------------------------------------------------------------- /abr-puffer/visualization/7b.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pickle 3 | import numpy as np 4 | import argparse 5 | import os 6 | import datetime 7 | from tqdm import tqdm 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--dir", help="root directory") 11 | args = parser.parse_args() 12 | 13 | source_policies = ['bola_basic_v2', 'bola_basic_v1', 'puffer_ttp_cl', 'puffer_ttp_20190202', 'linear_bba'] 14 | target_policies = ['bola_basic_v2', 'bola_basic_v1', 'linear_bba'] 15 | 16 | mape_dict = {source: {target: {'diff': 0, 'number': 0, 'average': 0, 'mad': 0} for target in target_policies} for 17 | source in source_policies} 18 | start_date = datetime.date(2020, 7, 27) 19 | end_date = datetime.date(2021, 6, 1) 20 | all_days = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)] 21 | all_days = [day for day in all_days if day not in [datetime.date(2019, 5, 12), datetime.date(2019, 5, 13), 22 | datetime.date(2019, 5, 15), datetime.date(2019, 5, 17), 23 | datetime.date(2019, 5, 18), datetime.date(2019, 5, 19), 24 | datetime.date(2019, 5, 25), datetime.date(2019, 5, 27), 25 | datetime.date(2019, 5, 30), datetime.date(2019, 6, 1), 26 | datetime.date(2019, 6, 2), datetime.date(2019, 6, 3), 27 | datetime.date(2019, 7, 2), datetime.date(2019, 7, 3), 28 | datetime.date(2019, 7, 4), datetime.date(2020, 7, 7), 29 | datetime.date(2020, 7, 8), datetime.date(2021, 6, 2), 30 | datetime.date(2021, 6, 2), datetime.date(2021, 6, 3), 31 | datetime.date(2022, 1, 31), datetime.date(2022, 2, 1), 32 | datetime.date(2022, 2, 2), datetime.date(2022, 2, 3), 33 | datetime.date(2022, 2, 4), datetime.date(2022, 2, 5), 34 | datetime.date(2022, 2, 6), datetime.date(2022, 2, 7)]] 35 | cooked_path = f'{args.dir}cooked' 36 | for target in target_policies: 37 | if target == 'linear_bba': 38 | name = 'linear_bba' 39 | elif target == 'bola_basic_v1': 40 | name = 'bola1' 41 | elif target == 'bola_basic_v2': 42 | name = 'bola2' 43 | PERIOD_TEXT = f'2020-07-27to2021-06-01_{target}' 44 | sl_path = f'{args.dir}{PERIOD_TEXT}_sl_cfs/cfs/model_10000' 45 | for today in tqdm(all_days): 46 | date_string = "%d-%02d-%02d" % (today.year, today.month, today.day) 47 | ids = np.load(f'{cooked_path}/{date_string}_ids_translated.npy', allow_pickle=True) 48 | orig_trajs = np.load(f'{cooked_path}/{date_string}_trajs.npy', allow_pickle=True) 49 | counterfactual_actions = np.load(f'{sl_path}/{date_string}_{name}_actions.npy', allow_pickle=True) 50 | for idx, action_trajectory in enumerate(counterfactual_actions): 51 | factual_actions = orig_trajs[idx][:-1, 7] 52 | assert len(action_trajectory) == len(factual_actions), f'{len(action_trajectory)}, {factual_actions.shape}' 53 | source = ids[idx] 54 | mape_dict[source][target]['number'] += len(action_trajectory) 55 | mape_dict[source][target]['diff'] += np.sum( 56 | np.abs((action_trajectory - factual_actions) / factual_actions * 100)) 57 | mape_dict[source][target]['mad'] += np.sum(np.abs(action_trajectory - factual_actions)) 58 | for source in source_policies: 59 | for target in target_policies: 60 | mape_dict[source][target]['average'] = mape_dict[source][target]['diff'] / mape_dict[source][target]['number'] 61 | mape_dict[source][target]['mad'] = mape_dict[source][target]['mad'] / mape_dict[source][target]['number'] 62 | del mape_dict[source][target]['diff'] 63 | del mape_dict[source][target]['number'] 64 | with open(f'{args.dir}tuned_hyperparams/buffer.pkl', 'rb') as f: 65 | f = pickle.load(f) 66 | Cs = [f[policy][1] for policy in target_policies] 67 | data = {source: {target: {'sim_EMD': None, 'sl_EMD': None, 'expert_EMD': None, 'mape': None, 'mad': None} for target in 68 | target_policies} for source in source_policies} 69 | for index, left_out_policy in enumerate(target_policies): 70 | with open(f'{args.dir}subset_EMDs/{left_out_policy}/sim_buff_{Cs[index]}.pkl', 'rb') as f: 71 | sim_dict = pickle.load(f) 72 | with open(f'{args.dir}subset_EMDs/{left_out_policy}/sl_buff_{Cs[index]}.pkl', 'rb') as f: 73 | sl_dict = pickle.load(f) 74 | with open(f'{args.dir}subset_EMDs/{left_out_policy}/expert_buff_{Cs[index]}.pkl', 'rb') as f: 75 | expert_dict = pickle.load(f) 76 | for source in source_policies: 77 | data[source][left_out_policy]['sim_EMD'] = sim_dict[source][left_out_policy] 78 | data[source][left_out_policy]['sl_EMD'] = sl_dict[source][left_out_policy] 79 | data[source][left_out_policy]['expert_EMD'] = expert_dict[source][left_out_policy] 80 | data[source][left_out_policy]['mape'] = mape_dict[source][left_out_policy]['average'] 81 | data[source][left_out_policy]['mad'] = mape_dict[source][left_out_policy]['mad'] * 8 / 1e6 82 | 83 | plt.figure(figsize=(3.25, 2.25)) 84 | x = np.array( 85 | [data[source][left_out]['mad'] for source in source_policies for index, left_out in enumerate(target_policies)]) 86 | y = np.array( 87 | [data[source][left_out]['sim_EMD'] for source in source_policies for index, left_out in enumerate(target_policies)]) 88 | r = np.polyfit(x, y, deg=1) 89 | plt.plot([np.min(x), np.max(x)], [r[0] * np.min(x) + r[1], r[0] * np.max(x) + r[1]], label='CausalSim', color='green') 90 | y = np.array([data[source][left_out]['expert_EMD'] for source in source_policies for index, left_out in 91 | enumerate(target_policies)]) 92 | r = np.polyfit(x, y, deg=1) 93 | plt.plot([np.min(x), np.max(x)], [r[0] * np.min(x) + r[1], r[0] * np.max(x) + r[1]], label='ExpertSim', color='blue') 94 | y = np.array( 95 | [data[source][left_out]['sl_EMD'] for source in source_policies for index, left_out in enumerate(target_policies)]) 96 | r = np.polyfit(x, y, deg=1) 97 | plt.plot([np.min(x), np.max(x)], [r[0] * np.min(x) + r[1], r[0] * np.max(x) + r[1]], label='SLSim', color='red') 98 | plt.legend() 99 | for source in source_policies: 100 | for index, left_out in enumerate(target_policies): 101 | plt.scatter(data[source][left_out]['mad'], data[source][left_out]['sim_EMD'], label='CausalSim', color='green', 102 | marker='>') 103 | plt.scatter(data[source][left_out]['mad'], data[source][left_out]['expert_EMD'], label='ExpertSim', 104 | color='blue', marker='s') 105 | plt.scatter(data[source][left_out]['mad'], data[source][left_out]['sl_EMD'], label='SLSim', color='red', 106 | marker='o') 107 | 108 | fig_path = f'{args.dir}plots' 109 | os.makedirs(fig_path, exist_ok=True) 110 | plt.savefig(f'{fig_path}/fig7b.pdf', format='pdf') -------------------------------------------------------------------------------- /load-balancing/causalsim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.utils.tensorboard import SummaryWriter 5 | import os 6 | from tqdm import tqdm 7 | from sklearn.model_selection import train_test_split 8 | from nn_util import MLP 9 | 10 | BATCH_SIZE = 2**13 # Maximum without exceeding GPU memory limit 11 | 12 | 13 | def train_causal_sim( 14 | datapath, 15 | kappa, 16 | no_policies, 17 | r=1, 18 | DISCRIMINATOR_EPOCH=10, 19 | models_path="models", 20 | BATCH_SIZE=2**13, 21 | N=int(5 * 1e6), 22 | ): 23 | path_models = f"{models_path}/causalsim/" 24 | 25 | try: 26 | os.makedirs(path_models) 27 | except: 28 | pass 29 | log_path = f"{path_models}/logs" 30 | try: 31 | os.makedirs(log_path) 32 | except: 33 | pass 34 | 35 | if torch.cuda.is_available(): 36 | device = torch.device(f"cuda:0") 37 | else: 38 | device = torch.device(f"cpu") 39 | inputs_train = np.load(f"{datapath}/white_train_inputs.npy")[:N, :] # 40 | # [:, (processing_time, one hot encoder for action (8))] 41 | 42 | outputs_train = np.load(f"{datapath}/white_train_outputs.npy")[:N, :] 43 | # [:, (processing_time, policy_label)] 44 | 45 | val_size = int(inputs_train.shape[0] * 0.1) 46 | train_idx, val_idx = train_test_split( 47 | np.arange(len(inputs_train)), test_size=val_size, train_size=None 48 | ) 49 | 50 | train_input_tensors = torch.as_tensor( 51 | inputs_train[train_idx], dtype=torch.float32, device=device 52 | ) 53 | train_output_tensors = torch.as_tensor( 54 | outputs_train[train_idx], dtype=torch.float32, device=device 55 | ) 56 | 57 | val_input_tensors = torch.as_tensor( 58 | inputs_train[val_idx], dtype=torch.float32, device=device 59 | ) 60 | val_output_tensors = torch.as_tensor( 61 | outputs_train[val_idx], dtype=torch.float32, device=device 62 | ) 63 | 64 | feature_extractor = MLP( 65 | input_dim=9, output_dim=r, hidden_sizes=[64, 64], activation=nn.ReLU 66 | ).to(device) 67 | action_factor_net = MLP( 68 | input_dim=8, output_dim=r, hidden_sizes=[], activation=nn.ReLU 69 | ).to(device) 70 | 71 | ## discriminator input 72 | discriminator1 = MLP( 73 | input_dim=r, output_dim=no_policies, hidden_sizes=[128, 128], activation=nn.ReLU 74 | ).to(device) 75 | mse_loss = nn.MSELoss() 76 | ce_loss = nn.CrossEntropyLoss() 77 | action_factor_optimizer = torch.optim.Adam( 78 | action_factor_net.parameters(), 79 | ) 80 | feature_extractor_optimizer = torch.optim.Adam( 81 | feature_extractor.parameters(), 82 | ) 83 | discriminator1_optimizer = torch.optim.Adam( 84 | discriminator1.parameters(), 85 | ) 86 | 87 | writer_train = SummaryWriter( 88 | log_dir=f"{log_path}/inner_loop_%d/kappa_%d/training" 89 | % (DISCRIMINATOR_EPOCH, kappa) 90 | ) 91 | best_loss = np.inf 92 | for epoch in tqdm(range(10000)): 93 | 94 | # Discriminator inner training loop: 95 | train_loss_list = [] 96 | for discriminator_epoch in range(DISCRIMINATOR_EPOCH + 1): 97 | idx = np.random.choice(np.arange(len(train_input_tensors)), size=BATCH_SIZE) 98 | batch_input_tensors = train_input_tensors[idx, :] 99 | batch_output_tensors1 = train_output_tensors[idx, 1] 100 | discriminator1_optimizer.zero_grad() 101 | feature_tensors = feature_extractor(batch_input_tensors) 102 | discriminated1_tensors = discriminator1(feature_tensors) 103 | discriminator1_loss = ce_loss( 104 | discriminated1_tensors, batch_output_tensors1.long() 105 | ) 106 | discriminator_loss = discriminator1_loss 107 | discriminator_loss.backward() 108 | discriminator1_optimizer.step() 109 | train_loss_list.append(discriminator_loss.cpu().detach().numpy()) 110 | 111 | writer_train.add_scalar("discriminators_loss", min(train_loss_list), epoch) 112 | 113 | # mediator predictor training: 114 | idx = np.random.choice(np.arange(len(train_input_tensors)), size=BATCH_SIZE) 115 | batch_input_tensors = train_input_tensors[idx] 116 | batch_output_tensors = train_output_tensors[idx] 117 | feature_extractor_optimizer.zero_grad() 118 | action_factor_optimizer.zero_grad() 119 | 120 | action_factor = action_factor_net(batch_input_tensors[:, 1:]) 121 | feature_tensors = feature_extractor(batch_input_tensors[:, :]) 122 | discriminated_tensors = discriminator1(feature_tensors) 123 | 124 | predicted_pt = torch.mul(feature_tensors, action_factor) 125 | predicted_pt = torch.matmul( 126 | predicted_pt, torch.ones([r, 1], dtype=torch.float32, device=device) 127 | ) 128 | 129 | pred_loss = mse_loss(predicted_pt, batch_output_tensors[:, :1]) 130 | fool_loss = ce_loss(discriminated_tensors, batch_output_tensors[:, 1].long()) 131 | total_loss = pred_loss - kappa * fool_loss 132 | writer_train.add_scalar( 133 | "predictor_loss/prediction", pred_loss.cpu().detach().numpy(), epoch 134 | ) 135 | writer_train.add_scalar( 136 | "predictor_loss/discriminator", fool_loss.cpu().detach().numpy(), epoch 137 | ) 138 | writer_train.add_scalar( 139 | "predictor_loss/total", total_loss.cpu().detach().numpy(), epoch 140 | ) 141 | total_loss.backward() 142 | action_factor_optimizer.step() 143 | feature_extractor_optimizer.step() 144 | 145 | if epoch % 1000 == 999: 146 | batch_input_tensors = val_input_tensors[:] 147 | batch_output_tensors = val_output_tensors[:] 148 | 149 | action_factor = action_factor_net(batch_input_tensors[:, 1:]) 150 | feature_tensors = feature_extractor(batch_input_tensors[:, :]) 151 | discriminated_tensors = discriminator1(feature_tensors) 152 | 153 | predicted_pt = torch.mul(feature_tensors, action_factor) 154 | predicted_pt = torch.matmul( 155 | predicted_pt, torch.ones([r, 1], dtype=torch.float32, device=device) 156 | ) 157 | 158 | pred_loss = mse_loss(predicted_pt, batch_output_tensors[:, :1]) 159 | fool_loss = ce_loss( 160 | discriminated_tensors, batch_output_tensors[:, 1].long() 161 | ) 162 | total_loss = pred_loss - kappa * fool_loss 163 | 164 | print( 165 | f"Val loss: epoch {epoch}, prediction loss {total_loss.cpu().detach().numpy()}, disc_loss {fool_loss.cpu().detach().numpy()} " 166 | ) 167 | if best_loss > total_loss: 168 | best_loss = total_loss 169 | print(f"saving ... best losses: {best_loss}") 170 | torch.save( 171 | feature_extractor, 172 | f"{path_models}/best_feature_" "extractor.pth", 173 | ) 174 | torch.save( 175 | action_factor_net, f"{path_models}/best_action_factor" ".pth" 176 | ) 177 | torch.save(discriminator1, f"{path_models}/best_discriminator.pth") 178 | -------------------------------------------------------------------------------- /abr-puffer/README.md: -------------------------------------------------------------------------------- 1 | # Real-world Adaptive BitRate (ABR) simulation with CausalSim 2 | 3 | To reproduce the results in the paper, we need to: 4 | 5 | 0. Install required python packages 6 | 1. Download and prepare the dataset (~40GB). 7 | 2. Train CausalSim and SLSim models 8 | 3. Carry out counterfactual simulations with these models and tune hyper-parameters 9 | 4. Plot full-scale simulation results 10 | 11 | *** 12 | ## 0. Python Packages 13 | We use Python (3.8 tested) for all experiments. Install the following packages via `pip` or `conda` : 14 | ``` 15 | numpy, pandas, tqdm, matplotlib, scikit-learn 16 | ``` 17 | Install PyTorch according to the website [instructions](https://pytorch.org). 18 | 19 | --- 20 | ## 1. Preparing the Dataset 21 | 22 | First, create a directory as a workspace for datasets, models, simulations and plots. We'll call this directory CAUSALSIM_DIR. 23 | Next, run the following command: 24 | ``` 25 | python3 data_preparation/create_dataset.py --dir CAUSALSIM_DIR 26 | ``` 27 | This script will download stream logs from [the puffer website](https://puffer.stanford.edu). It will then filter them 28 | according to the [puffer-statistics](https://github.com/StanfordSNR/puffer-statistics) definition of `slow streams`. 29 | The dataset is saved in `CAUSALSIM_DIR/cooked`. 30 | 31 | To normalize the data and prepare it for training, run the following script: 32 | ``` 33 | python data_preparation/generate_subset_data.py --dir CAUSALSIM_DIR 34 | ``` 35 | --- 36 | ## 2. Training 37 | ### Using the pre-trained checkpoints 38 | We provide our trained checkpoints that we used in the paper in [assets](https://github.com/CausalSim/Unbiased-Trace-Driven-Simulation/tree/master/abr-puffer/assets). 39 | To use them, copy everything inside [assets](https://github.com/CausalSim/Unbiased-Trace-Driven-Simulation/tree/master/abr-puffer/assets) 40 | to `CAUSALSIM_DIR` and proceed to the next step (3. Counterfactual Simulation). 41 | Unfortunately, we used random seeds for training these models, so training from scratch might produce different models. 42 | To reproduce the exact results in the paper, use the pretrained checkpoints. 43 | 44 | ### Training from scratch 45 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]`, train the corresponding SLSim model with 46 | the following script: 47 | ``` 48 | python training/sl_subset_train.py --dir CAUSALSIM_DIR --left_out_policy target 49 | ``` 50 | Similarly, for each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]` and `Kappa` 51 | (loss hyper-parameter) from `[0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0]`, train the corresponding 52 | CausalSim model with the following script: 53 | ``` 54 | python training/train_subset.py --dir CAUSALSIM_DIR --left_out_policy target --C Kappa 55 | ``` 56 | --- 57 | ## 3. Counterfactual Simulation and Hyper-parameter Tuning 58 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]` and `Kappa` 59 | (loss hyper-parameter) from `[0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0]`, extract and save the 60 | latent factors using the corresponding CausalSim model with the following script: 61 | ``` 62 | python inference/extract_subset_latents.py --dir CAUSALSIM_DIR --left_out_policy target --C Kappa 63 | ``` 64 | To do counterfactual simulation with ExpertSim, use the following script: 65 | ``` 66 | python inference/expert_cfs.py --dir CAUSALSIM_DIR 67 | ``` 68 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]`, use the corresponding trained SLSim model 69 | for counterfactual simulation using the following script: 70 | ``` 71 | python inference/sl_subset_cfs.py --dir CAUSALSIM_DIR --left_out_policy target 72 | ``` 73 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]` and `Kappa` (loss hyper-parameter) from 74 | `[0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0]`, use the corresponding CausalSim model to generate 75 | counterfactual buffer and SSIM trajectories with the following script: 76 | ``` 77 | python inference/buffer_subset_cfs.py --dir CAUSALSIM_DIR --left_out_policy target --C Kappa 78 | ``` 79 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]`, calculate the average SSIM using the 80 | ground-truth data, corresponding ExpertSim simulations, and corresponding SLSim simulations with the following scripts: 81 | ``` 82 | python analysis/original_subset_ssim.py --dir CAUSALSIM_DIR --left_out_policy target 83 | python analysis/sl_subset_ssim.py --dir CAUSALSIM_DIR --left_out_policy target 84 | ``` 85 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]` and `Kappa` (loss hyper-parameter) from 86 | `[0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0]`, calculate the average SSIM using the corresponding 87 | CausalSim simulations with the following script: 88 | ``` 89 | python analysis/subset_ssim.py --dir CAUSALSIM_DIR --left_out_policy target --C Kappa 90 | ``` 91 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]` and `Kappa` (loss hyper-parameter) from 92 | `[0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0]`, calculate the simulated buffer distribution's Earth 93 | Mover Distance (EMD) using the fround-truth data and corresponding CausalSim, SLSim, and ExpertSim simulations with the 94 | following script: 95 | ``` 96 | python analysis/subset_EMD.py --dir CAUSALSIM_DIR --left_out_policy target --C Kappa 97 | ``` 98 | To Tune CausalSim's hyper-parameters for buffer and SSIM prediction, use the following script: 99 | ``` 100 | python analysis/tune_buffer_hyperparameters.py --dir CAUSALSIM_DIR 101 | ``` 102 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]` and `Kappa` (loss hyper-parameter) from 103 | `[0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0]`, use the corresponding CausalSim model to generate 104 | counterfactual downloadtime trajectories with the following script: 105 | ``` 106 | python inference/downloadtime_subset_cfs.py --dir CAUSALSIM_DIR --left_out_policy target --C Kappa 107 | ``` 108 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]`, calculate the average stall ratio using 109 | the ground-truth data, corresponding ExpertSim simulations, and corresponding SLSim simulations with the following 110 | scripts: 111 | ``` 112 | python analysis/original_subset_stall.py --dir CAUSALSIM_DIR --left_out_policy target 113 | python analysis/sl_subset_stall.py --dir CAUSALSIM_DIR --left_out_policy target 114 | ``` 115 | For each choice of `target` from `[linear_bba, bola_basic_v1, bola_basic_v2]` and `Kappa` (loss hyper-parameter) from 116 | `[0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0]`, calculate the average stall ratio using the 117 | corresponding CausalSim simulations with the following script: 118 | ``` 119 | python analysis/subset_stall.py --dir CAUSALSIM_DIR --left_out_policy target --C Kappa 120 | ``` 121 | To Tune CausalSim's hyper-parameters for downloadtime prediction, use the following script: 122 | ``` 123 | python analysis/tune_downloadtime_hyperparameters.py --dir CAUSALSIM_DIR 124 | ``` 125 | --- 126 | ## 4. Result Visualization 127 | You can find scripts for generating paper's plots in the [visualization folder](https://github.com/CausalSim/Unbiased-Trace-Driven-Simulation/tree/master/abr-puffer/visualization). -------------------------------------------------------------------------------- /abr-synthetic/env/abr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from collections import deque 4 | from .trace_loader import load_traces 5 | from tqdm import tqdm 6 | 7 | 8 | class ABRSimEnv(object): 9 | MAX_BUFF_S = 10.0 10 | CHUNK_LENGTH = 4.0 11 | CHOICES = 6 12 | # mapping between action and bitrate level 13 | BITRATE_MAP = [0.3, 0.75, 1.2, 1.85, 2.85, 4.3] 14 | REBUF_PENALTY = 4.3 15 | 16 | def __init__(self, mpc_lookahead: int, mpc_lookback: int, seed: int, trace_folder: str, num_traces: int): 17 | # observation and action space 18 | self.mpc_lookahead = mpc_lookahead 19 | self.mpc_lookback = mpc_lookback 20 | self.delay_list = None 21 | self.chunk_idx = None 22 | self.buffer_size = None 23 | self.past_action = None 24 | self.past_chunk_throughputs = None 25 | self.past_chunk_download_times = None 26 | self.np_random = None 27 | self.obs_high = None 28 | self.obs_low = None 29 | self.observation_space = None 30 | self.action_space = None 31 | self.setup_space() 32 | # set up seed 33 | self.seed(seed) 34 | # load all video chunk sizes 35 | self.chunk_sizes = np.load(os.path.dirname(__file__) + '/video_sizes.npy').T 36 | # assert number of chunks for different bitrates are all the same 37 | self.total_num_chunks = len(self.chunk_sizes) 38 | # load all trace files 39 | self.all_traces, self.all_rtts = load_traces(trace_folder=trace_folder, seed=seed, 40 | length_trace=self.total_num_chunks, 41 | num_traces=num_traces) 42 | # how many past throughput to report 43 | self.past_chunk_len = max(mpc_lookahead, mpc_lookback) 44 | 45 | print('Precomputing all download times...') 46 | self.all_delays = np.array([thr_slow_start(trace, self.chunk_sizes, rtt) 47 | for trace, rtt in tqdm(zip(self.all_traces, self.all_rtts), total=num_traces)]) 48 | print('Finished') 49 | 50 | def observe(self): 51 | if self.chunk_idx < self.total_num_chunks: 52 | valid_chunk_idx = self.chunk_idx 53 | else: 54 | valid_chunk_idx = 0 55 | 56 | if self.past_action is not None: 57 | valid_past_action = self.past_action 58 | else: 59 | valid_past_action = 0 60 | 61 | # network throughput of past chunk, past chunk download time, 62 | # current buffer, number of chunks left and the last bitrate choice 63 | obs_arr = [self.past_chunk_throughputs[-i] for i in range(self.mpc_lookback, 0, -1)] 64 | obs_arr.extend([self.past_chunk_download_times[-i] for i in range(self.mpc_lookback, 0, -1)]) 65 | obs_arr.extend([self.buffer_size, self.total_num_chunks - self.chunk_idx, valid_past_action]) 66 | 67 | # current chunk size of different bitrates 68 | for chunk_idx_add in range(valid_chunk_idx, self.mpc_lookahead+valid_chunk_idx): 69 | obs_arr.extend(self.chunk_sizes[chunk_idx_add % self.total_num_chunks, i] for i in range(6)) 70 | 71 | for i in range(6): 72 | obs_arr.append(self.chunk_sizes[valid_chunk_idx, i] / self.delay_list[valid_chunk_idx, i]) 73 | for i in range(6): 74 | obs_arr.append(self.delay_list[valid_chunk_idx, i]) 75 | 76 | obs_arr = np.array(obs_arr) 77 | assert np.all(obs_arr >= self.obs_low), obs_arr 78 | assert np.all(obs_arr <= self.obs_high), obs_arr 79 | 80 | return obs_arr 81 | 82 | def reset(self, trace_choice=None): 83 | assert trace_choice < len(self.all_traces) 84 | self.delay_list = self.all_delays[trace_choice] 85 | self.chunk_idx = 0 86 | self.buffer_size = 0.0 # initial download time not counted 87 | self.past_action = None 88 | self.past_chunk_throughputs = deque(maxlen=self.past_chunk_len) 89 | self.past_chunk_download_times = deque(maxlen=self.past_chunk_len) 90 | for _ in range(self.past_chunk_len): 91 | self.past_chunk_throughputs.append(0) 92 | self.past_chunk_download_times.append(0) 93 | 94 | return self.observe() 95 | 96 | def seed(self, seed): 97 | self.np_random = np.random.RandomState(seed) 98 | 99 | def setup_space(self): 100 | # Set up the observation and action space 101 | self.obs_low = np.array([0] * (3 + 2 * self.mpc_lookback + 6 * self.mpc_lookahead + 12)) 102 | self.obs_high = np.array([100e6] * self.mpc_lookback + [5000] * self.mpc_lookback + [100, 500, 5] + 103 | [10e6] * (6*self.mpc_lookahead) + [100e6] * 6 + [5000] * 6) 104 | 105 | def step(self, action): 106 | # 0 <= action < num_bitrates 107 | assert 0 <= action < self.CHOICES 108 | 109 | # Note: sizes are in bytes, times are in seconds 110 | chunk_size = self.chunk_sizes[self.chunk_idx, action] 111 | 112 | # compute chunk download time based on trace 113 | delay = self.delay_list[self.chunk_idx, action] 114 | 115 | # compute buffer size 116 | rebuffer_time = max(delay - self.buffer_size, 0) 117 | 118 | # update video buffer 119 | self.buffer_size = max(self.buffer_size - delay, 0) 120 | self.buffer_size += self.CHUNK_LENGTH # each chunk is 4 seconds of video 121 | 122 | # cap the buffer size 123 | self.buffer_size = min(self.buffer_size, self.MAX_BUFF_S) 124 | 125 | # bitrate change penalty 126 | if self.past_action is None: 127 | bitrate_change = 0 128 | else: 129 | bitrate_change = np.abs(self.BITRATE_MAP[action] - self.BITRATE_MAP[self.past_action]) 130 | 131 | # linear reward 132 | # (https://dl.acm.org/citation.cfm?id=3098843 section 5.1, QoE metrics (1)) 133 | reward = self.BITRATE_MAP[action] - self.REBUF_PENALTY * rebuffer_time - bitrate_change 134 | 135 | # store action for future bitrate change penalty 136 | self.past_action = action 137 | 138 | # update observed network bandwidth and duration 139 | self.past_chunk_throughputs.append(chunk_size / float(delay)) 140 | self.past_chunk_download_times.append(delay) 141 | 142 | # advance video 143 | self.chunk_idx += 1 144 | done = (self.chunk_idx == self.total_num_chunks) 145 | 146 | return self.observe(), reward, done, \ 147 | {'bitrate': self.BITRATE_MAP[action], 148 | 'stall_time': rebuffer_time, 149 | 'bitrate_change': bitrate_change} 150 | 151 | 152 | def thr_slow_start(trace: np.ndarray, chunk_sizes: np.ndarray, rtt: float, thr_start: float = 2*1500) -> np.ndarray: 153 | delays = np.empty_like(chunk_sizes, dtype=float) 154 | # thr_start: bytes/second, Two packets, MTU = 1500 bytes 155 | thr_end = trace / 8.0 * 1e6 # bytes/second 156 | len_thr_exp_arr = np.ceil(np.log2(thr_end / thr_start)).astype(int) 157 | assert np.all(len_thr_exp_arr > 0) 158 | for i in range(delays.shape[0]): 159 | thr_arr = np.exp2(np.arange(len_thr_exp_arr[i]+1)) * thr_start 160 | thr_arr[-1] = thr_end[i] 161 | time_arr = np.ones(len_thr_exp_arr[i]) * rtt / 1000 162 | cumul_sum_thr = np.cumsum(thr_arr[:-1] * time_arr) 163 | for j, chunk in enumerate(chunk_sizes[i]): 164 | index_start = np.where(cumul_sum_thr > chunk)[0] 165 | index_start = len(thr_arr) - 1 if len(index_start) == 0 else index_start[0] 166 | time_first = 0 if index_start == 0 else rtt / 1000 * index_start 167 | size_first = 0 if index_start == 0 else cumul_sum_thr[index_start - 1] 168 | delays[i, j] = time_first + (chunk - size_first) / thr_arr[index_start] 169 | return delays 170 | -------------------------------------------------------------------------------- /abr-synthetic/generate_traces.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from abc import ABC, abstractmethod 3 | from tqdm import tqdm 4 | import torch 5 | 6 | 7 | def collect_traces_causalsim( 8 | pols, 9 | c_hat_observed, 10 | actions_observed, 11 | chunk_sizes, 12 | min_rtt, 13 | feature_extractor, 14 | buffer_predictor, 15 | action_factor_net, 16 | r, 17 | buf_mean, 18 | buf_std, 19 | next_buf_mean, 20 | next_buf_std, 21 | chat_mean, 22 | chat_std, 23 | size_mean, 24 | size_std, 25 | min_rtts_mean, 26 | min_rtts_std, 27 | down_time_mean, 28 | down_time_std, 29 | ): 30 | 31 | # Load policies 32 | length = c_hat_observed.shape[0] 33 | observation_array = np.zeros( 34 | [length + 1, 55] 35 | ) # 0-4 thpt, 5-9 download time, 10 buffer, 12 last action, 36 | observation_array[:, 13:43] = chunk_sizes 37 | observation_array[:, 11] = np.arange(1, length + 2)[::-1] 38 | observation_array[:, 43:] = np.nan 39 | cf_trajs = np.zeros([len(pols), length + 1, 3]) 40 | feature = np.zeros([length]) 41 | for i_pol, policy in enumerate(pols): 42 | observation_array[:, :11] = 0 43 | observation_array[:, 12] = 0 44 | for i in range(length): 45 | # Choose action 46 | act = policy.take_action(observation_array[i, :]) 47 | 48 | ########### Use the counterfactual model to predict buffer level and download time 49 | chat = c_hat_observed[i] 50 | orig_chosen_size = observation_array[i, 13 + actions_observed[i]] 51 | chat_white = (chat - chat_mean) / chat_std 52 | selected_size = observation_array[i, 13 + act] 53 | selected_size_white = (selected_size - size_mean) / size_std 54 | orig_chosen_size_white = (orig_chosen_size - size_mean) / size_std 55 | buffer = observation_array[i, 10] 56 | buffer_white = (buffer - buf_mean) / buf_std 57 | min_rtt_white = (min_rtt - min_rtts_mean) / min_rtts_std 58 | 59 | input_numpy = np.array( 60 | [orig_chosen_size_white, min_rtt_white, chat_white] 61 | ) # Model only accepts normalized inputs 62 | input_numpy = np.expand_dims(input_numpy, axis=0) 63 | input_tensor = torch.as_tensor( 64 | input_numpy, dtype=torch.float32, device=torch.device("cpu") 65 | ) 66 | with torch.no_grad(): 67 | feature_tensor = feature_extractor(input_tensor) 68 | feature[i] = feature_tensor.cpu().numpy()[0][0] 69 | action_factor = action_factor_net( 70 | torch.tensor([selected_size_white], dtype=torch.float32) 71 | ) 72 | predicted_thpt = torch.mul(feature_tensor, action_factor) 73 | predicted_thpt = torch.matmul( 74 | predicted_thpt, torch.ones([r, 1], dtype=torch.float32) 75 | ) 76 | 77 | input_numpy = np.array([buffer_white, selected_size_white, min_rtt_white]) 78 | input_numpy = np.expand_dims(input_numpy, axis=0) 79 | input_tensor = torch.as_tensor( 80 | input_numpy, dtype=torch.float32, device=torch.device("cpu") 81 | ) 82 | input_tensor = torch.cat((input_tensor, predicted_thpt), dim=1) 83 | with torch.no_grad(): 84 | prediction = buffer_predictor(input_tensor) 85 | next_buffer_white_tensor, down_time_white_tensor = ( 86 | prediction[0, 0], 87 | prediction[0, 1], 88 | ) 89 | next_buffer_white = next_buffer_white_tensor.cpu().numpy() 90 | down_time_white = down_time_white_tensor.cpu().numpy() 91 | download_time = (down_time_white * down_time_std) + down_time_mean 92 | next_buffer = (next_buffer_white * next_buf_std) + next_buf_mean 93 | 94 | # update observation_array 95 | observation_array[i + 1, 10] = next_buffer 96 | observation_array[i + 1, 12] = act 97 | observation_array[i + 1, 0:4] = observation_array[i, 1:5] 98 | observation_array[i + 1, 4] = selected_size / download_time 99 | observation_array[i + 1, 5:9] = observation_array[i, 6:10] 100 | observation_array[i + 1, 9] = download_time 101 | 102 | # Save results 103 | # NOTE: download time and next buffer are stored one index later, as is the case with original data 104 | cf_trajs[i_pol, i + 1, 0] = next_buffer 105 | cf_trajs[i_pol, i + 1, 1] = download_time 106 | cf_trajs[i_pol, i + 1, 2] = act 107 | act = policy.take_action(observation_array[i + 1, :]) 108 | 109 | return ( 110 | cf_trajs, 111 | feature, 112 | ) 113 | 114 | 115 | def collect_traces_slsim( 116 | pols, 117 | c_hat_observed, 118 | chunk_sizes, 119 | min_rtt, 120 | buffer_predictor, 121 | buf_mean, 122 | buf_std, 123 | next_buf_mean, 124 | next_buf_std, 125 | chat_mean, 126 | chat_std, 127 | size_mean, 128 | size_std, 129 | min_rtts_mean, 130 | min_rtts_std, 131 | down_time_mean, 132 | down_time_std, 133 | ): 134 | 135 | # Load policies 136 | length = c_hat_observed.shape[0] 137 | observation_array = np.zeros( 138 | [length + 1, 55] 139 | ) # 0-4 thpt, 5-9 download time, 10 buffer, 12 last action, 140 | observation_array[:, 13:43] = chunk_sizes 141 | observation_array[:, 11] = np.arange(1, length + 2)[::-1] 142 | observation_array[:, 43:] = np.nan 143 | cf_trajs = np.zeros([len(pols), length + 1, 3]) 144 | for i_pol, policy in enumerate(pols): 145 | observation_array[:, :11] = 0 146 | observation_array[:, 12] = 0 147 | for i in range(length): 148 | # Choose action 149 | act = policy.take_action(observation_array[i, :]) 150 | 151 | ########### Use the counterfactual model to predict buffer level and download time 152 | chat = c_hat_observed[i] 153 | chat_white = (chat - chat_mean) / chat_std 154 | selected_size = observation_array[i, 13 + act] 155 | selected_size_white = (selected_size - size_mean) / size_std 156 | buffer = observation_array[i, 10] 157 | buffer_white = (buffer - buf_mean) / buf_std 158 | min_rtt_white = (min_rtt - min_rtts_mean) / min_rtts_std 159 | input_numpy = np.array( 160 | [buffer_white, selected_size_white, min_rtt_white, chat_white] 161 | ) 162 | input_numpy = np.expand_dims(input_numpy, axis=0) 163 | input_tensor = torch.as_tensor( 164 | input_numpy, dtype=torch.float32, device=torch.device("cpu") 165 | ) 166 | with torch.no_grad(): 167 | prediction = buffer_predictor(input_tensor) 168 | next_buffer_white_tensor, down_time_white_tensor = ( 169 | prediction[0, 0], 170 | prediction[0, 1], 171 | ) 172 | next_buffer_white = next_buffer_white_tensor.cpu().numpy() 173 | down_time_white = down_time_white_tensor.cpu().numpy() 174 | download_time = (down_time_white * down_time_std) + down_time_mean 175 | next_buffer = (next_buffer_white * next_buf_std) + next_buf_mean 176 | 177 | # update observation_array 178 | observation_array[i + 1, 10] = next_buffer 179 | observation_array[i + 1, 12] = act 180 | observation_array[i + 1, 0:4] = observation_array[i, 1:5] 181 | observation_array[i + 1, 4] = selected_size / download_time 182 | observation_array[i + 1, 5:9] = observation_array[i, 6:10] 183 | observation_array[i + 1, 9] = download_time 184 | 185 | # Save results 186 | # NOTE: download time and next buffer are stored one index later, as is the case with original data 187 | cf_trajs[i_pol, i + 1, 0] = next_buffer 188 | cf_trajs[i_pol, i + 1, 1] = download_time 189 | cf_trajs[i_pol, i + 1, 2] = act 190 | act = policy.take_action(observation_array[i + 1, :]) 191 | 192 | return cf_trajs 193 | -------------------------------------------------------------------------------- /load-balancing/latency_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | import torch 4 | 5 | from create_dataset import ( 6 | PowerofKPolicy, 7 | OptimalPolicy, 8 | RandomPolicy, 9 | ShortestQueuePolicy, 10 | JobScheduler, 11 | ProcessTimeManager, 12 | TrackerPolicy, 13 | ) 14 | 15 | 16 | def collect_traces_sim_traj_fact( 17 | job_sizes, 18 | inter_arrs, 19 | ptimes_obs, 20 | actions_obs, 21 | feature_extractor, 22 | action_factor_net, 23 | r, 24 | pt_mean, 25 | pt_std, 26 | test_pol_idx, 27 | p_change=0, 28 | ): 29 | ns = 8 30 | seed = 43 31 | load_target = 0.6 32 | # p_change should be 0 if '_0' and 0.5 if '_50' 33 | assert job_sizes.ndim == 2 34 | assert job_sizes.shape == inter_arrs.shape 35 | assert ptimes_obs.shape == actions_obs.shape 36 | assert ptimes_obs.shape[:] == job_sizes.shape 37 | 38 | no_traj, length = job_sizes.shape 39 | 40 | # Load info arrays 41 | time_jobs = np.cumsum(inter_arrs, axis=-1) 42 | actions = np.empty((1, no_traj, length), dtype=int) 43 | proc_times = np.empty((1, no_traj, length), dtype=float) 44 | latencies = np.empty((1, no_traj, length), dtype=float) 45 | feature = np.empty((no_traj, length), dtype=float) 46 | max_action = 7 47 | env = JobScheduler(ns) 48 | pt_mgr = ProcessTimeManager(ns, seed, p_change) 49 | # Load policies 50 | pols = [ 51 | RandomPolicy(seed, ns), 52 | ShortestQueuePolicy(seed, ns), 53 | PowerofKPolicy(seed, ns, 2), 54 | PowerofKPolicy(seed, ns, 3), 55 | PowerofKPolicy(seed, ns, 4), 56 | PowerofKPolicy(seed, ns, 5), 57 | OptimalPolicy(seed, ns), 58 | TrackerPolicy(seed, ns, 0.995), 59 | ] 60 | for i_pol, policy in enumerate(pols[test_pol_idx : test_pol_idx + 1]): 61 | # Reset environment 62 | # Load rate manager 63 | pt_mgr = ProcessTimeManager(ns, seed, p_change) 64 | # Register rate manager for optimal policy 65 | policy.register(pt_manager=pt_mgr) 66 | for index in tqdm(range(no_traj)): 67 | obs = env.reset() 68 | for i in range(length): 69 | # Choose server 70 | act = policy.act(obs) 71 | # Calculate processing time 72 | pt_o = ptimes_obs[index, i] 73 | orig_action = int(actions_obs[index, i]) 74 | # get one-hot encoder 75 | orig_action_oh = np.zeros(max_action + 1) 76 | orig_action_oh[orig_action] = 1 77 | 78 | pt_o_white = (pt_o - pt_mean) / pt_std 79 | 80 | action = int(act) 81 | # get one-hot encoder for action 82 | action_oh = np.zeros(max_action + 1) 83 | action_oh[action] = 1 84 | 85 | input_numpy = np.array( 86 | [pt_o_white] + list(orig_action_oh) 87 | ) # Model only accepts normalized inputs 88 | input_numpy = np.expand_dims(input_numpy, axis=0) 89 | input_tensor = torch.as_tensor( 90 | input_numpy, dtype=torch.float32, device=torch.device("cpu") 91 | ) 92 | with torch.no_grad(): 93 | feature_tensor = feature_extractor(input_tensor) 94 | input_numpy = np.array(action_oh) 95 | input_numpy = np.expand_dims(input_numpy, axis=0) 96 | input_tensor = torch.as_tensor( 97 | input_numpy, dtype=torch.float32, device=torch.device("cpu") 98 | ) 99 | action_factor = action_factor_net(input_tensor) 100 | 101 | cf_processing_time_white = torch.mul(feature_tensor, action_factor) 102 | cf_processing_time_white = torch.matmul( 103 | cf_processing_time_white, torch.ones([r, 1], dtype=torch.float32) 104 | ) 105 | 106 | cf_processing_time_white = ( 107 | cf_processing_time_white.cpu().detach().numpy()[0][0] 108 | ) 109 | ptime = (cf_processing_time_white * pt_std) + pt_mean 110 | 111 | # ########## Use the counterfactual model to predict ptime 112 | # Submit processing time for tracker policy 113 | policy.submit(act, ptime) 114 | # Receive latency and queue sizes 115 | latency, obs = env.step(ptime, act, inter_arrs[index, i]) 116 | # Save info 117 | proc_times[i_pol, index, i] = ptime 118 | feature[index, i] = feature_tensor.detach().cpu().numpy()[0] 119 | actions[i_pol, index, i] = act 120 | latencies[i_pol, index, i] = latency 121 | 122 | return feature, actions, proc_times, latencies 123 | 124 | 125 | def collect_traces_direct_traj( 126 | job_sizes, 127 | inter_arrs, 128 | ptimes_obs, 129 | buffer_predictor, 130 | pt_mean, 131 | pt_std, 132 | test_pol_idx, 133 | p_change=0, 134 | ): 135 | ns = 8 136 | seed = 43 137 | load_target = 0.6 138 | # p_change should be 0 if '_0' and 0.5 if '_50' 139 | assert job_sizes.ndim == 2 140 | assert job_sizes.shape == inter_arrs.shape 141 | assert ptimes_obs.shape[:] == job_sizes.shape 142 | 143 | no_traj, length = job_sizes.shape 144 | 145 | # Load policies 146 | pols = [ 147 | RandomPolicy(seed, ns), 148 | ShortestQueuePolicy(seed, ns), 149 | PowerofKPolicy(seed, ns, 2), 150 | PowerofKPolicy(seed, ns, 3), 151 | PowerofKPolicy(seed, ns, 4), 152 | PowerofKPolicy(seed, ns, 5), 153 | OptimalPolicy(seed, ns), 154 | TrackerPolicy(seed, ns, 0.995), 155 | ] 156 | if test_pol_idx is not None: 157 | pols = pols[test_pol_idx : test_pol_idx + 1] 158 | p_out = 1 159 | else: 160 | p_out = len(pols) 161 | # Load info arrays 162 | time_jobs = np.cumsum(inter_arrs, axis=-1) 163 | actions = np.empty((p_out, no_traj, length), dtype=int) 164 | proc_times = np.empty((p_out, no_traj, length), dtype=float) 165 | latencies = np.empty((p_out, no_traj, length), dtype=float) 166 | feature = np.empty((no_traj, length), dtype=float) 167 | max_action = 7 168 | env = JobScheduler(ns) 169 | pt_mgr = ProcessTimeManager(ns, seed, p_change) 170 | for i_pol, policy in enumerate(pols): 171 | # Reset environment 172 | # Load rate manager 173 | pt_mgr = ProcessTimeManager(ns, seed, p_change) 174 | # Register rate manager for optimal policy 175 | policy.register(pt_manager=pt_mgr) 176 | for index in tqdm(range(no_traj)): 177 | obs = env.reset() 178 | for i in range(length): 179 | # Choose server 180 | act = policy.act(obs) 181 | # Calculate processing time 182 | pt_o = ptimes_obs[index, i] 183 | pt_o_white = (pt_o - pt_mean) / pt_std 184 | 185 | action = int(act) 186 | # get one-hot encoder for action 187 | input_numpy = np.zeros(max_action + 2) 188 | input_numpy[action + 1] = 1 189 | input_numpy[0] = pt_o_white 190 | input_numpy = np.array([input_numpy]) 191 | input_tensor = torch.as_tensor( 192 | input_numpy, 193 | dtype=torch.float32, 194 | device=torch.device("cpu"), 195 | ) 196 | with torch.no_grad(): 197 | cf_processing_time_white = buffer_predictor(input_tensor) 198 | cf_processing_time_white = cf_processing_time_white.cpu().numpy()[0][0] 199 | ptime = (cf_processing_time_white * pt_std) + pt_mean 200 | 201 | ########### Use the counterfactual model to predict ptime 202 | # Submit processing time for tracker policy 203 | policy.submit(act, ptime) 204 | # Receive latency and queue sizes 205 | latency, obs = env.step(ptime, act, inter_arrs[index, i]) 206 | # Save info 207 | proc_times[i_pol, index, i] = ptime 208 | actions[i_pol, index, i] = act 209 | latencies[i_pol, index, i] = latency 210 | 211 | return actions, proc_times, latencies 212 | -------------------------------------------------------------------------------- /abr-synthetic/causalsim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.utils.tensorboard import SummaryWriter 5 | import os 6 | from tqdm import tqdm 7 | from sklearn.model_selection import train_test_split 8 | from nn_util import MLP 9 | 10 | BATCH_SIZE = 2**13 # Maximum without exceeding GPU memory limit 11 | 12 | 13 | def train_causal_sim( 14 | datapath, 15 | kappa, 16 | no_policies, 17 | r=2, 18 | DISCRIMINATOR_EPOCH=10, 19 | models_path="models", 20 | BATCH_SIZE=2**13, 21 | N=int(5 * 1e6), 22 | ): 23 | path_models = f"{models_path}/causalsim/" 24 | 25 | try: 26 | os.makedirs(path_models) 27 | except: 28 | pass 29 | log_path = f"{path_models}/logs" 30 | try: 31 | os.makedirs(log_path) 32 | except: 33 | pass 34 | 35 | if torch.cuda.is_available(): 36 | device = torch.device(f"cuda:1") 37 | else: 38 | device = torch.device(f"cpu") 39 | inputs_train = np.load(f"{datapath}/white_train_inputs_synthetic.npy") # 40 | # [:, (buffer, chosen_chunk_size, min_rtt, c_hat)] 41 | 42 | outputs_train = np.load(f"{datapath}/white_train_outputs_synthetic.npy")[:, :] 43 | # [:, (next_buffer, download_time, policy_label)] 44 | 45 | val_size = int(inputs_train.shape[0] * 0.15) 46 | train_idx, val_idx = train_test_split( 47 | np.arange(len(inputs_train)), test_size=val_size, train_size=None 48 | ) 49 | 50 | train_input_tensors = torch.as_tensor( 51 | inputs_train[:], dtype=torch.float32, device=device 52 | ) 53 | train_output_tensors = torch.as_tensor( 54 | outputs_train[:], dtype=torch.float32, device=device 55 | ) 56 | 57 | val_input_tensors = torch.as_tensor( 58 | inputs_train[val_idx], dtype=torch.float32, device=device 59 | ) 60 | val_output_tensors = torch.as_tensor( 61 | outputs_train[val_idx], dtype=torch.float32, device=device 62 | ) 63 | 64 | # init networks 65 | feature_extractor = MLP( 66 | input_dim=3, output_dim=r, hidden_sizes=[128, 128], activation=nn.ReLU 67 | ).to(device) 68 | action_factor_net = MLP( 69 | input_dim=1, output_dim=r, hidden_sizes=[64, 64], activation=nn.ReLU 70 | ).to(device) 71 | buffer_predictor = MLP( 72 | input_dim=4, output_dim=2, hidden_sizes=[128, 128], activation=nn.ReLU 73 | ).to(device) 74 | discriminator = MLP( 75 | input_dim=r, output_dim=no_policies, hidden_sizes=[128, 128], activation=nn.ReLU 76 | ).to(device) 77 | # init losses 78 | mse_loss = nn.MSELoss() 79 | ce_loss = nn.CrossEntropyLoss() 80 | 81 | # init optimizers 82 | action_factor_optimizer = torch.optim.Adam(action_factor_net.parameters(), lr=1e-3) 83 | feature_extractor_optimizer = torch.optim.Adam( 84 | feature_extractor.parameters(), lr=1e-3 85 | ) 86 | buffer_predictor_optimizer = torch.optim.Adam( 87 | buffer_predictor.parameters(), lr=1e-3 88 | ) 89 | discriminator_optimizer = torch.optim.Adam(discriminator.parameters(), lr=1e-3) 90 | 91 | writer_train = SummaryWriter( 92 | log_dir=f"{log_path}/inner_loop_%d/kappa_%d/training" 93 | % (DISCRIMINATOR_EPOCH, kappa) 94 | ) 95 | best_loss = np.inf 96 | 97 | for epoch in tqdm(range(10000)): 98 | # Discriminator inner training loop: 99 | train_loss_list = [] 100 | for discriminator_epoch in range(DISCRIMINATOR_EPOCH + 1): 101 | idx = np.random.choice(np.arange(len(train_input_tensors)), size=BATCH_SIZE) 102 | batch_output_tensors = train_output_tensors[idx, 2] 103 | batch_input_tensors = train_input_tensors[idx, 1:] 104 | discriminator_optimizer.zero_grad() 105 | feature_tensors = feature_extractor(batch_input_tensors) 106 | discriminated_tensors = discriminator(feature_tensors) 107 | discriminator_loss = ce_loss( 108 | discriminated_tensors, batch_output_tensors.long() 109 | ) 110 | discriminator_loss.backward() 111 | discriminator_optimizer.step() 112 | train_loss_list.append(discriminator_loss.cpu().detach().numpy()) 113 | 114 | writer_train.add_scalar("discriminator_loss", min(train_loss_list), epoch) 115 | 116 | # extractor training: 117 | idx = np.random.choice(np.arange(len(train_input_tensors)), size=BATCH_SIZE) 118 | batch_input_tensors = train_input_tensors[idx] 119 | batch_output_tensors = train_output_tensors[idx] 120 | feature_extractor_optimizer.zero_grad() 121 | action_factor_optimizer.zero_grad() 122 | 123 | action_factor = action_factor_net(batch_input_tensors[:, 1:2]) 124 | feature_tensors = feature_extractor(batch_input_tensors[:, 1:]) 125 | 126 | predicted_thpt = torch.mul(feature_tensors, action_factor) 127 | predicted_thpt = torch.matmul( 128 | predicted_thpt, torch.ones([r, 1], dtype=torch.float32, device=device) 129 | ) 130 | 131 | discriminated_tensors = discriminator(feature_tensors) 132 | pred_loss = mse_loss(predicted_thpt, batch_input_tensors[:, 3:]) 133 | fool_loss = ce_loss(discriminated_tensors, batch_output_tensors[:, 2].long()) 134 | total_loss = pred_loss - kappa * fool_loss 135 | writer_train.add_scalar( 136 | "predictor_loss/prediction", pred_loss.cpu().detach().numpy(), epoch 137 | ) 138 | writer_train.add_scalar( 139 | "predictor_loss/discriminator", fool_loss.cpu().detach().numpy(), epoch 140 | ) 141 | writer_train.add_scalar( 142 | "predictor_loss/total", total_loss.cpu().detach().numpy(), epoch 143 | ) 144 | total_loss.backward() 145 | action_factor_optimizer.step() 146 | feature_extractor_optimizer.step() 147 | 148 | if epoch % 1000 == 999: 149 | batch_input_tensors = val_input_tensors[:] 150 | batch_output_tensors = val_output_tensors[:] 151 | 152 | action_factor = action_factor_net(batch_input_tensors[:, 1:2]) 153 | feature_tensors = feature_extractor(batch_input_tensors[:, 1:]) 154 | discriminated_tensors = discriminator(feature_tensors) 155 | 156 | predicted_pt = torch.mul(feature_tensors, action_factor) 157 | predicted_pt = torch.matmul( 158 | predicted_pt, torch.ones([r, 1], dtype=torch.float32, device=device) 159 | ) 160 | pred_loss = mse_loss(predicted_pt, batch_input_tensors[:, 3:]) 161 | fool_loss = ce_loss( 162 | discriminated_tensors, batch_output_tensors[:, 2].long() 163 | ) 164 | total_loss = pred_loss - kappa * fool_loss 165 | 166 | print( 167 | f"Val loss: epoch {epoch}, prediction loss {total_loss.cpu().detach().numpy()}, disc_loss {fool_loss.cpu().detach().numpy()} " 168 | ) 169 | if best_loss > total_loss: 170 | best_loss = total_loss 171 | print(f"saving ... best losses: {best_loss}") 172 | torch.save( 173 | feature_extractor, 174 | f"{path_models}/best_feature_extractor.pth", 175 | ) 176 | torch.save( 177 | action_factor_net, f"{path_models}/best_action_factor" ".pth" 178 | ) 179 | torch.save(discriminator, f"{path_models}/best_discriminator.pth") 180 | 181 | # train predictor 182 | for epoch in tqdm(range(20000)): 183 | idx = np.random.choice(np.arange(len(train_input_tensors)), size=BATCH_SIZE) 184 | batch_input_tensors = train_input_tensors[idx] 185 | batch_output_tensors = train_output_tensors[idx] 186 | buffer_predictor_optimizer.zero_grad() 187 | action_factor = action_factor_net(batch_input_tensors[:, 1:2]) 188 | feature_tensors = feature_extractor(batch_input_tensors[:, 1:]) 189 | predicted_thpt = torch.mul(feature_tensors, action_factor) 190 | predicted_thpt = torch.matmul( 191 | predicted_thpt, torch.ones([r, 1], dtype=torch.float32, device=device) 192 | ) 193 | pred_obs_tensors = buffer_predictor( 194 | torch.cat((batch_input_tensors[:, :-1], predicted_thpt), dim=1) 195 | ) 196 | pred_loss = mse_loss(pred_obs_tensors, batch_output_tensors[:, :2]) 197 | writer_train.add_scalar( 198 | "predictor_loss/buffer_predictor", pred_loss.cpu().detach().numpy(), epoch 199 | ) 200 | pred_loss.backward() 201 | buffer_predictor_optimizer.step() 202 | if epoch % 1000 == 999: 203 | torch.save( 204 | buffer_predictor, 205 | f"{path_models}/best_buffer_predictor.pth", 206 | ) 207 | 208 | print(f" prediction loss {pred_loss.cpu().detach().numpy()}") 209 | --------------------------------------------------------------------------------