├── .DS_Store
├── .ipynb_checkpoints
    └── prepare_plot_data-checkpoint.ipynb
├── cvar
    ├── .DS_Store
    ├── .ipynb_checkpoints
    │   └── speed_cvar-checkpoint.ipynb
    ├── Plotting_cvar.Rmd
    ├── Plotting_cvar.html
    ├── __pycache__
    │   ├── tree.cpython-36.pyc
    │   └── tree.cpython-38.pyc
    ├── cvar_tree_utilities.py
    ├── experiment_cvar_lognormal.py
    ├── experiment_cvar_lognormal_honesty.py
    ├── experiment_cvar_lognormal_objcoef.py
    ├── experiment_cvar_lognormal_oracle.py
    ├── experiment_cvar_normal.py
    ├── experiment_cvar_normal_oracle.py
    ├── feature_imp_cvar_lognormal.csv
    ├── feature_imp_cvar_lognormal.pkl
    ├── feature_split_cvar_lognormal.csv
    ├── feature_split_cvar_lognormal.pkl
    ├── gurobi.log
    ├── regression_tree.py
    ├── risk_cvar_lognormal.pkl
    ├── risk_cvar_lognormal_honesty.pkl
    ├── risk_cvar_lognormal_objcoef.pkl
    ├── risk_cvar_lognormal_oracle.pkl
    ├── risk_cvar_lognormal_updatestep.pkl
    ├── risk_cvar_normal.pkl
    ├── risk_cvar_normal_oracle.pkl
    ├── risk_lognormal.csv
    ├── risk_lognormal_honesty.csv
    ├── risk_lognormal_objcoef.csv
    ├── risk_lognormal_oracle.csv
    ├── risk_normal.csv
    ├── risk_normal_oracle.csv
    ├── speed_cvar.ipynb
    ├── time_cvar.pkl
    └── tree.py
├── mean_var
    ├── .DS_Store
    ├── .ipynb_checkpoints
    │   └── speed_meanvar-checkpoint.ipynb
    ├── Plotting_meanvar.Rmd
    ├── Plotting_var.Rmd
    ├── __pycache__
    │   ├── tree.cpython-36.pyc
    │   └── tree.cpython-38.pyc
    ├── abs_risk_full.csv
    ├── cond_violation_full.csv
    ├── cond_violation_meanvar_normal_stoch.pkl
    ├── experiment_meanvar_stoch.py
    ├── experiment_meanvar_stoch_R.py
    ├── experiment_meanvar_stoch_oracle.py
    ├── experiment_var_normal.py
    ├── experiment_var_normal_oracle.py
    ├── feature_freq_full.csv
    ├── feature_freq_full_oracle.csv
    ├── feature_split_meanvar_normal_stoch.pkl
    ├── feature_split_meanvar_normal_stoch_oracle.pkl
    ├── feature_split_var_normal_oracle.csv
    ├── feature_split_var_normal_oracle.pkl
    ├── marginal_violation_full.csv
    ├── mean_violation_meanvar_normal_stoch.pkl
    ├── meanvar_tree_utilities.py
    ├── regression_tree.py
    ├── rel_risk_full.csv
    ├── rel_risk_full_R.csv
    ├── rel_risk_full_oracle.csv
    ├── rel_risk_meanvar_normal_stoch.pkl
    ├── rel_risk_meanvar_normal_stoch_R.pkl
    ├── rel_risk_meanvar_normal_stoch_oracle.pkl
    ├── risk_meanvar_normal_stoch.pkl
    ├── risk_meanvar_normal_stoch_oracle.pkl
    ├── risk_var_normal.csv
    ├── risk_var_normal.pkl
    ├── risk_var_normal_oracle.csv
    ├── risk_var_normal_oracle.pkl
    ├── speed_meanvar.ipynb
    ├── time_meanvar.pkl
    └── tree.py
├── newsvendor
    ├── .DS_Store
    ├── Plotting_newsvendor.Rmd
    ├── experiment_nv_highdim.py
    ├── experiment_nv_honesty.py
    ├── experiment_nv_n.py
    ├── experiment_nv_p.py
    ├── feature_importance_n.csv
    ├── feature_importance_n.pkl
    ├── feature_split_n.pkl
    ├── feature_split_nv_n.csv
    ├── feature_split_p.pkl
    ├── nv_tree_utilities.py
    ├── risk_highdim.pkl
    ├── risk_n.pkl
    ├── risk_nv_highdim.csv
    ├── risk_nv_honesty.csv
    ├── risk_nv_honesty.pkl
    ├── risk_nv_n.csv
    ├── risk_nv_p.csv
    ├── risk_p.pkl
    └── tree.py
├── prepare_plot_data.ipynb
├── readme.md
└── uber
    ├── .DS_Store
    ├── A_downtwon_1221to1256.csv
    ├── Plotting_uber.Rmd
    ├── X_halfyear.csv
    ├── X_onehalfyear.csv
    ├── X_oneyear.csv
    ├── X_twoyear.csv
    ├── Y_halfyear.csv
    ├── Y_onehalfyear.csv
    ├── Y_oneyear.csv
    ├── Y_twoyear.csv
    ├── b_downtwon_1221to1256.csv
    ├── cvar_tree_utilities.py
    ├── data
        ├── origin1220_des1223_sd2018-01-01_ed2018-03-31.csv
        ├── origin1220_des1223_sd2018-04-01_ed2018-06-30.csv
        ├── origin1220_des1223_sd2018-07-01_ed2018-09-30.csv
        ├── origin1220_des1223_sd2018-10-01_ed2018-12-31.csv
        ├── origin1220_des1223_sd2019-01-01_ed2019-03-31.csv
        ├── origin1220_des1223_sd2019-04-01_ed2019-06-30.csv
        ├── origin1220_des1223_sd2019-07-01_ed2019-09-30.csv
        ├── origin1220_des1223_sd2019-10-01_ed2019-12-31.csv
        ├── origin1220_des1224_sd2018-01-01_ed2018-03-31.csv
        ├── origin1220_des1224_sd2018-04-01_ed2018-06-30.csv
        ├── origin1220_des1224_sd2018-07-01_ed2018-09-30.csv
        ├── origin1220_des1224_sd2018-10-01_ed2018-12-31.csv
        ├── origin1220_des1224_sd2019-01-01_ed2019-03-31.csv
        ├── origin1220_des1224_sd2019-04-01_ed2019-06-30.csv
        ├── origin1220_des1224_sd2019-07-01_ed2019-09-30.csv
        ├── origin1220_des1224_sd2019-10-01_ed2019-12-31.csv
        ├── origin1220_des1230_sd2018-01-01_ed2018-03-31.csv
        ├── origin1220_des1230_sd2018-04-01_ed2018-06-30.csv
        ├── origin1220_des1230_sd2018-07-01_ed2018-09-30.csv
        ├── origin1220_des1230_sd2018-10-01_ed2018-12-31.csv
        ├── origin1220_des1230_sd2019-01-01_ed2019-03-31.csv
        ├── origin1220_des1230_sd2019-04-01_ed2019-06-30.csv
        ├── origin1220_des1230_sd2019-07-01_ed2019-09-30.csv
        ├── origin1220_des1230_sd2019-10-01_ed2019-12-31.csv
        ├── origin1220_des1390_sd2018-01-01_ed2018-03-31.csv
        ├── origin1220_des1390_sd2018-04-01_ed2018-06-30.csv
        ├── origin1220_des1390_sd2018-07-01_ed2018-09-30.csv
        ├── origin1220_des1390_sd2018-10-01_ed2018-12-31.csv
        ├── origin1220_des1390_sd2019-01-01_ed2019-03-31.csv
        ├── origin1220_des1390_sd2019-04-01_ed2019-06-30.csv
        ├── origin1220_des1390_sd2019-07-01_ed2019-09-30.csv
        ├── origin1220_des1390_sd2019-10-01_ed2019-12-31.csv
        ├── origin1221_des1220_sd2018-01-01_ed2018-03-31.csv
        ├── origin1221_des1220_sd2018-04-01_ed2018-06-30.csv
        ├── origin1221_des1220_sd2018-07-01_ed2018-09-30.csv
        ├── origin1221_des1220_sd2018-10-01_ed2018-12-31.csv
        ├── origin1221_des1220_sd2019-01-01_ed2019-03-31.csv
        ├── origin1221_des1220_sd2019-04-01_ed2019-06-30.csv
        ├── origin1221_des1220_sd2019-07-01_ed2019-09-30.csv
        ├── origin1221_des1220_sd2019-10-01_ed2019-12-31.csv
        ├── origin1221_des1222_sd2018-01-01_ed2018-03-31.csv
        ├── origin1221_des1222_sd2018-04-01_ed2018-06-30.csv
        ├── origin1221_des1222_sd2018-07-01_ed2018-09-30.csv
        ├── origin1221_des1222_sd2018-10-01_ed2018-12-31.csv
        ├── origin1221_des1222_sd2019-01-01_ed2019-03-31.csv
        ├── origin1221_des1222_sd2019-04-01_ed2019-06-30.csv
        ├── origin1221_des1222_sd2019-07-01_ed2019-09-30.csv
        ├── origin1221_des1222_sd2019-10-01_ed2019-12-31.csv
        ├── origin1222_des1220_sd2018-01-01_ed2018-03-31.csv
        ├── origin1222_des1220_sd2018-04-01_ed2018-06-30.csv
        ├── origin1222_des1220_sd2018-07-01_ed2018-09-30.csv
        ├── origin1222_des1220_sd2018-10-01_ed2018-12-31.csv
        ├── origin1222_des1220_sd2019-01-01_ed2019-03-31.csv
        ├── origin1222_des1220_sd2019-04-01_ed2019-06-30.csv
        ├── origin1222_des1220_sd2019-07-01_ed2019-09-30.csv
        ├── origin1222_des1220_sd2019-10-01_ed2019-12-31.csv
        ├── origin1223_des1224_sd2018-01-01_ed2018-03-31.csv
        ├── origin1223_des1224_sd2018-04-01_ed2018-06-30.csv
        ├── origin1223_des1224_sd2018-07-01_ed2018-09-30.csv
        ├── origin1223_des1224_sd2018-10-01_ed2018-12-31.csv
        ├── origin1223_des1224_sd2019-01-01_ed2019-03-31.csv
        ├── origin1223_des1224_sd2019-04-01_ed2019-06-30.csv
        ├── origin1223_des1224_sd2019-07-01_ed2019-09-30.csv
        ├── origin1223_des1224_sd2019-10-01_ed2019-12-31.csv
        ├── origin1223_des1229_sd2018-01-01_ed2018-03-31.csv
        ├── origin1223_des1229_sd2018-04-01_ed2018-06-30.csv
        ├── origin1223_des1229_sd2018-07-01_ed2018-09-30.csv
        ├── origin1223_des1229_sd2018-10-01_ed2018-12-31.csv
        ├── origin1223_des1229_sd2019-01-01_ed2019-03-31.csv
        ├── origin1223_des1229_sd2019-04-01_ed2019-06-30.csv
        ├── origin1223_des1229_sd2019-07-01_ed2019-09-30.csv
        ├── origin1223_des1229_sd2019-10-01_ed2019-12-31.csv
        ├── origin1224_des1229_sd2018-01-01_ed2018-03-31.csv
        ├── origin1224_des1229_sd2018-04-01_ed2018-06-30.csv
        ├── origin1224_des1229_sd2018-07-01_ed2018-09-30.csv
        ├── origin1224_des1229_sd2018-10-01_ed2018-12-31.csv
        ├── origin1224_des1229_sd2019-01-01_ed2019-03-31.csv
        ├── origin1224_des1229_sd2019-04-01_ed2019-06-30.csv
        ├── origin1224_des1229_sd2019-07-01_ed2019-09-30.csv
        ├── origin1224_des1229_sd2019-10-01_ed2019-12-31.csv
        ├── origin1224_des1390_sd2018-01-01_ed2018-03-31.csv
        ├── origin1224_des1390_sd2018-04-01_ed2018-06-30.csv
        ├── origin1224_des1390_sd2018-07-01_ed2018-09-30.csv
        ├── origin1224_des1390_sd2018-10-01_ed2018-12-31.csv
        ├── origin1224_des1390_sd2019-01-01_ed2019-03-31.csv
        ├── origin1224_des1390_sd2019-04-01_ed2019-06-30.csv
        ├── origin1224_des1390_sd2019-07-01_ed2019-09-30.csv
        ├── origin1224_des1390_sd2019-10-01_ed2019-12-31.csv
        ├── origin1228_des1232_sd2018-01-01_ed2018-03-31.csv
        ├── origin1228_des1232_sd2018-04-01_ed2018-06-30.csv
        ├── origin1228_des1232_sd2018-07-01_ed2018-09-30.csv
        ├── origin1228_des1232_sd2018-10-01_ed2018-12-31.csv
        ├── origin1228_des1232_sd2019-01-01_ed2019-03-31.csv
        ├── origin1228_des1232_sd2019-04-01_ed2019-06-30.csv
        ├── origin1228_des1232_sd2019-07-01_ed2019-09-30.csv
        ├── origin1228_des1232_sd2019-10-01_ed2019-12-31.csv
        ├── origin1228_des1233_sd2018-01-01_ed2018-03-31.csv
        ├── origin1228_des1233_sd2018-04-01_ed2018-06-30.csv
        ├── origin1228_des1233_sd2018-07-01_ed2018-09-30.csv
        ├── origin1228_des1233_sd2018-10-01_ed2018-12-31.csv
        ├── origin1228_des1233_sd2019-01-01_ed2019-03-31.csv
        ├── origin1228_des1233_sd2019-04-01_ed2019-06-30.csv
        ├── origin1228_des1233_sd2019-07-01_ed2019-09-30.csv
        ├── origin1228_des1233_sd2019-10-01_ed2019-12-31.csv
        ├── origin1228_des1234_sd2018-01-01_ed2018-03-31.csv
        ├── origin1228_des1234_sd2018-04-01_ed2018-06-30.csv
        ├── origin1228_des1234_sd2018-07-01_ed2018-09-30.csv
        ├── origin1228_des1234_sd2018-10-01_ed2018-12-31.csv
        ├── origin1228_des1234_sd2019-01-01_ed2019-03-31.csv
        ├── origin1228_des1234_sd2019-04-01_ed2019-06-30.csv
        ├── origin1228_des1234_sd2019-07-01_ed2019-09-30.csv
        ├── origin1228_des1234_sd2019-10-01_ed2019-12-31.csv
        ├── origin1229_des1228_sd2018-01-01_ed2018-03-31.csv
        ├── origin1229_des1228_sd2018-04-01_ed2018-06-30.csv
        ├── origin1229_des1228_sd2018-07-01_ed2018-09-30.csv
        ├── origin1229_des1228_sd2018-10-01_ed2018-12-31.csv
        ├── origin1229_des1228_sd2019-01-01_ed2019-03-31.csv
        ├── origin1229_des1228_sd2019-04-01_ed2019-06-30.csv
        ├── origin1229_des1228_sd2019-07-01_ed2019-09-30.csv
        ├── origin1229_des1228_sd2019-10-01_ed2019-12-31.csv
        ├── origin1230_des1223_sd2018-01-01_ed2018-03-31.csv
        ├── origin1230_des1223_sd2018-04-01_ed2018-06-30.csv
        ├── origin1230_des1223_sd2018-07-01_ed2018-09-30.csv
        ├── origin1230_des1223_sd2018-10-01_ed2018-12-31.csv
        ├── origin1230_des1223_sd2019-01-01_ed2019-03-31.csv
        ├── origin1230_des1223_sd2019-04-01_ed2019-06-30.csv
        ├── origin1230_des1223_sd2019-07-01_ed2019-09-30.csv
        ├── origin1230_des1223_sd2019-10-01_ed2019-12-31.csv
        ├── origin1230_des1228_sd2018-01-01_ed2018-03-31.csv
        ├── origin1230_des1228_sd2018-04-01_ed2018-06-30.csv
        ├── origin1230_des1228_sd2018-07-01_ed2018-09-30.csv
        ├── origin1230_des1228_sd2018-10-01_ed2018-12-31.csv
        ├── origin1230_des1228_sd2019-01-01_ed2019-03-31.csv
        ├── origin1230_des1228_sd2019-04-01_ed2019-06-30.csv
        ├── origin1230_des1228_sd2019-07-01_ed2019-09-30.csv
        ├── origin1230_des1228_sd2019-10-01_ed2019-12-31.csv
        ├── origin1230_des1229_sd2018-01-01_ed2018-03-31.csv
        ├── origin1230_des1229_sd2018-04-01_ed2018-06-30.csv
        ├── origin1230_des1229_sd2018-07-01_ed2018-09-30.csv
        ├── origin1230_des1229_sd2018-10-01_ed2018-12-31.csv
        ├── origin1230_des1229_sd2019-01-01_ed2019-03-31.csv
        ├── origin1230_des1229_sd2019-04-01_ed2019-06-30.csv
        ├── origin1230_des1229_sd2019-07-01_ed2019-09-30.csv
        ├── origin1230_des1229_sd2019-10-01_ed2019-12-31.csv
        ├── origin1230_des1232_sd2018-01-01_ed2018-03-31.csv
        ├── origin1230_des1232_sd2018-04-01_ed2018-06-30.csv
        ├── origin1230_des1232_sd2018-07-01_ed2018-09-30.csv
        ├── origin1230_des1232_sd2018-10-01_ed2018-12-31.csv
        ├── origin1230_des1232_sd2019-01-01_ed2019-03-31.csv
        ├── origin1230_des1232_sd2019-04-01_ed2019-06-30.csv
        ├── origin1230_des1232_sd2019-07-01_ed2019-09-30.csv
        ├── origin1230_des1232_sd2019-10-01_ed2019-12-31.csv
        ├── origin1230_des1235_sd2018-01-01_ed2018-03-31.csv
        ├── origin1230_des1235_sd2018-04-01_ed2018-06-30.csv
        ├── origin1230_des1235_sd2018-07-01_ed2018-09-30.csv
        ├── origin1230_des1235_sd2018-10-01_ed2018-12-31.csv
        ├── origin1230_des1235_sd2019-01-01_ed2019-03-31.csv
        ├── origin1230_des1235_sd2019-04-01_ed2019-06-30.csv
        ├── origin1230_des1235_sd2019-07-01_ed2019-09-30.csv
        ├── origin1230_des1235_sd2019-10-01_ed2019-12-31.csv
        ├── origin1232_des1233_sd2018-01-01_ed2018-03-31.csv
        ├── origin1232_des1233_sd2018-04-01_ed2018-06-30.csv
        ├── origin1232_des1233_sd2018-07-01_ed2018-09-30.csv
        ├── origin1232_des1233_sd2018-10-01_ed2018-12-31.csv
        ├── origin1232_des1233_sd2019-01-01_ed2019-03-31.csv
        ├── origin1232_des1233_sd2019-04-01_ed2019-06-30.csv
        ├── origin1232_des1233_sd2019-07-01_ed2019-09-30.csv
        ├── origin1232_des1233_sd2019-10-01_ed2019-12-31.csv
        ├── origin1232_des1254_sd2018-01-01_ed2018-03-31.csv
        ├── origin1232_des1254_sd2018-04-01_ed2018-06-30.csv
        ├── origin1232_des1254_sd2018-07-01_ed2018-09-30.csv
        ├── origin1232_des1254_sd2018-10-01_ed2018-12-31.csv
        ├── origin1232_des1254_sd2019-01-01_ed2019-03-31.csv
        ├── origin1232_des1254_sd2019-04-01_ed2019-06-30.csv
        ├── origin1232_des1254_sd2019-07-01_ed2019-09-30.csv
        ├── origin1232_des1254_sd2019-10-01_ed2019-12-31.csv
        ├── origin1233_des1254_sd2018-01-01_ed2018-03-31.csv
        ├── origin1233_des1254_sd2018-04-01_ed2018-06-30.csv
        ├── origin1233_des1254_sd2018-07-01_ed2018-09-30.csv
        ├── origin1233_des1254_sd2018-10-01_ed2018-12-31.csv
        ├── origin1233_des1254_sd2019-01-01_ed2019-03-31.csv
        ├── origin1233_des1254_sd2019-04-01_ed2019-06-30.csv
        ├── origin1233_des1254_sd2019-07-01_ed2019-09-30.csv
        ├── origin1233_des1254_sd2019-10-01_ed2019-12-31.csv
        ├── origin1233_des1255_sd2018-01-01_ed2018-03-31.csv
        ├── origin1233_des1255_sd2018-04-01_ed2018-06-30.csv
        ├── origin1233_des1255_sd2018-07-01_ed2018-09-30.csv
        ├── origin1233_des1255_sd2018-10-01_ed2018-12-31.csv
        ├── origin1233_des1255_sd2019-01-01_ed2019-03-31.csv
        ├── origin1233_des1255_sd2019-04-01_ed2019-06-30.csv
        ├── origin1233_des1255_sd2019-07-01_ed2019-09-30.csv
        ├── origin1233_des1255_sd2019-10-01_ed2019-12-31.csv
        ├── origin1233_des1263_sd2018-01-01_ed2018-03-31.csv
        ├── origin1233_des1263_sd2018-04-01_ed2018-06-30.csv
        ├── origin1233_des1263_sd2018-07-01_ed2018-09-30.csv
        ├── origin1233_des1263_sd2018-10-01_ed2018-12-31.csv
        ├── origin1233_des1263_sd2019-01-01_ed2019-03-31.csv
        ├── origin1233_des1263_sd2019-04-01_ed2019-06-30.csv
        ├── origin1233_des1263_sd2019-07-01_ed2019-09-30.csv
        ├── origin1233_des1263_sd2019-10-01_ed2019-12-31.csv
        ├── origin1233_des1380_sd2018-01-01_ed2018-03-31.csv
        ├── origin1233_des1380_sd2018-04-01_ed2018-06-30.csv
        ├── origin1233_des1380_sd2018-07-01_ed2018-09-30.csv
        ├── origin1233_des1380_sd2018-10-01_ed2018-12-31.csv
        ├── origin1233_des1380_sd2019-01-01_ed2019-03-31.csv
        ├── origin1233_des1380_sd2019-04-01_ed2019-06-30.csv
        ├── origin1233_des1380_sd2019-07-01_ed2019-09-30.csv
        ├── origin1233_des1380_sd2019-10-01_ed2019-12-31.csv
        ├── origin1234_des1233_sd2018-01-01_ed2018-03-31.csv
        ├── origin1234_des1233_sd2018-04-01_ed2018-06-30.csv
        ├── origin1234_des1233_sd2018-07-01_ed2018-09-30.csv
        ├── origin1234_des1233_sd2018-10-01_ed2018-12-31.csv
        ├── origin1234_des1233_sd2019-01-01_ed2019-03-31.csv
        ├── origin1234_des1233_sd2019-04-01_ed2019-06-30.csv
        ├── origin1234_des1233_sd2019-07-01_ed2019-09-30.csv
        ├── origin1234_des1233_sd2019-10-01_ed2019-12-31.csv
        ├── origin1234_des1380_sd2018-01-01_ed2018-03-31.csv
        ├── origin1234_des1380_sd2018-04-01_ed2018-06-30.csv
        ├── origin1234_des1380_sd2018-07-01_ed2018-09-30.csv
        ├── origin1234_des1380_sd2018-10-01_ed2018-12-31.csv
        ├── origin1234_des1380_sd2019-01-01_ed2019-03-31.csv
        ├── origin1234_des1380_sd2019-04-01_ed2019-06-30.csv
        ├── origin1234_des1380_sd2019-07-01_ed2019-09-30.csv
        ├── origin1234_des1380_sd2019-10-01_ed2019-12-31.csv
        ├── origin1235_des1237_sd2018-01-01_ed2018-03-31.csv
        ├── origin1235_des1237_sd2018-04-01_ed2018-06-30.csv
        ├── origin1235_des1237_sd2018-07-01_ed2018-09-30.csv
        ├── origin1235_des1237_sd2018-10-01_ed2018-12-31.csv
        ├── origin1235_des1237_sd2019-01-01_ed2019-03-31.csv
        ├── origin1235_des1237_sd2019-04-01_ed2019-06-30.csv
        ├── origin1235_des1237_sd2019-07-01_ed2019-09-30.csv
        ├── origin1235_des1237_sd2019-10-01_ed2019-12-31.csv
        ├── origin1235_des1254_sd2018-01-01_ed2018-03-31.csv
        ├── origin1235_des1254_sd2018-04-01_ed2018-06-30.csv
        ├── origin1235_des1254_sd2018-07-01_ed2018-09-30.csv
        ├── origin1235_des1254_sd2018-10-01_ed2018-12-31.csv
        ├── origin1235_des1254_sd2019-01-01_ed2019-03-31.csv
        ├── origin1235_des1254_sd2019-04-01_ed2019-06-30.csv
        ├── origin1235_des1254_sd2019-07-01_ed2019-09-30.csv
        ├── origin1235_des1254_sd2019-10-01_ed2019-12-31.csv
        ├── origin1236_des1238_sd2018-01-01_ed2018-03-31.csv
        ├── origin1236_des1238_sd2018-04-01_ed2018-06-30.csv
        ├── origin1236_des1238_sd2018-07-01_ed2018-09-30.csv
        ├── origin1236_des1238_sd2018-10-01_ed2018-12-31.csv
        ├── origin1236_des1238_sd2019-01-01_ed2019-03-31.csv
        ├── origin1236_des1238_sd2019-04-01_ed2019-06-30.csv
        ├── origin1236_des1238_sd2019-07-01_ed2019-09-30.csv
        ├── origin1236_des1238_sd2019-10-01_ed2019-12-31.csv
        ├── origin1236_des1253_sd2018-01-01_ed2018-03-31.csv
        ├── origin1236_des1253_sd2018-04-01_ed2018-06-30.csv
        ├── origin1236_des1253_sd2018-07-01_ed2018-09-30.csv
        ├── origin1236_des1253_sd2018-10-01_ed2018-12-31.csv
        ├── origin1236_des1253_sd2019-01-01_ed2019-03-31.csv
        ├── origin1236_des1253_sd2019-04-01_ed2019-06-30.csv
        ├── origin1236_des1253_sd2019-07-01_ed2019-09-30.csv
        ├── origin1236_des1253_sd2019-10-01_ed2019-12-31.csv
        ├── origin1237_des1236_sd2018-01-01_ed2018-03-31.csv
        ├── origin1237_des1236_sd2018-04-01_ed2018-06-30.csv
        ├── origin1237_des1236_sd2018-07-01_ed2018-09-30.csv
        ├── origin1237_des1236_sd2018-10-01_ed2018-12-31.csv
        ├── origin1237_des1236_sd2019-01-01_ed2019-03-31.csv
        ├── origin1237_des1236_sd2019-04-01_ed2019-06-30.csv
        ├── origin1237_des1236_sd2019-07-01_ed2019-09-30.csv
        ├── origin1237_des1236_sd2019-10-01_ed2019-12-31.csv
        ├── origin1237_des1239_sd2018-01-01_ed2018-03-31.csv
        ├── origin1237_des1239_sd2018-04-01_ed2018-06-30.csv
        ├── origin1237_des1239_sd2018-07-01_ed2018-09-30.csv
        ├── origin1237_des1239_sd2018-10-01_ed2018-12-31.csv
        ├── origin1237_des1239_sd2019-01-01_ed2019-03-31.csv
        ├── origin1237_des1239_sd2019-04-01_ed2019-06-30.csv
        ├── origin1237_des1239_sd2019-07-01_ed2019-09-30.csv
        ├── origin1237_des1239_sd2019-10-01_ed2019-12-31.csv
        ├── origin1237_des1252_sd2018-01-01_ed2018-03-31.csv
        ├── origin1237_des1252_sd2018-04-01_ed2018-06-30.csv
        ├── origin1237_des1252_sd2018-07-01_ed2018-09-30.csv
        ├── origin1237_des1252_sd2018-10-01_ed2018-12-31.csv
        ├── origin1237_des1252_sd2019-01-01_ed2019-03-31.csv
        ├── origin1237_des1252_sd2019-04-01_ed2019-06-30.csv
        ├── origin1237_des1252_sd2019-07-01_ed2019-09-30.csv
        ├── origin1237_des1252_sd2019-10-01_ed2019-12-31.csv
        ├── origin1238_des1241_sd2018-01-01_ed2018-03-31.csv
        ├── origin1238_des1241_sd2018-04-01_ed2018-06-30.csv
        ├── origin1238_des1241_sd2018-07-01_ed2018-09-30.csv
        ├── origin1238_des1241_sd2018-10-01_ed2018-12-31.csv
        ├── origin1238_des1241_sd2019-01-01_ed2019-03-31.csv
        ├── origin1238_des1241_sd2019-04-01_ed2019-06-30.csv
        ├── origin1238_des1241_sd2019-07-01_ed2019-09-30.csv
        ├── origin1238_des1241_sd2019-10-01_ed2019-12-31.csv
        ├── origin1238_des1249_sd2018-01-01_ed2018-03-31.csv
        ├── origin1238_des1249_sd2018-04-01_ed2018-06-30.csv
        ├── origin1238_des1249_sd2018-07-01_ed2018-09-30.csv
        ├── origin1238_des1249_sd2018-10-01_ed2018-12-31.csv
        ├── origin1238_des1249_sd2019-01-01_ed2019-03-31.csv
        ├── origin1238_des1249_sd2019-04-01_ed2019-06-30.csv
        ├── origin1238_des1249_sd2019-07-01_ed2019-09-30.csv
        ├── origin1238_des1249_sd2019-10-01_ed2019-12-31.csv
        ├── origin1238_des1250_sd2018-01-01_ed2018-03-31.csv
        ├── origin1238_des1250_sd2018-04-01_ed2018-06-30.csv
        ├── origin1238_des1250_sd2018-07-01_ed2018-09-30.csv
        ├── origin1238_des1250_sd2018-10-01_ed2018-12-31.csv
        ├── origin1238_des1250_sd2019-01-01_ed2019-03-31.csv
        ├── origin1238_des1250_sd2019-04-01_ed2019-06-30.csv
        ├── origin1238_des1250_sd2019-07-01_ed2019-09-30.csv
        ├── origin1238_des1250_sd2019-10-01_ed2019-12-31.csv
        ├── origin1239_des1238_sd2018-01-01_ed2018-03-31.csv
        ├── origin1239_des1238_sd2018-04-01_ed2018-06-30.csv
        ├── origin1239_des1238_sd2018-07-01_ed2018-09-30.csv
        ├── origin1239_des1238_sd2018-10-01_ed2018-12-31.csv
        ├── origin1239_des1238_sd2019-01-01_ed2019-03-31.csv
        ├── origin1239_des1238_sd2019-04-01_ed2019-06-30.csv
        ├── origin1239_des1238_sd2019-07-01_ed2019-09-30.csv
        ├── origin1239_des1238_sd2019-10-01_ed2019-12-31.csv
        ├── origin1239_des1240_sd2018-01-01_ed2018-03-31.csv
        ├── origin1239_des1240_sd2018-04-01_ed2018-06-30.csv
        ├── origin1239_des1240_sd2018-07-01_ed2018-09-30.csv
        ├── origin1239_des1240_sd2018-10-01_ed2018-12-31.csv
        ├── origin1239_des1240_sd2019-01-01_ed2019-03-31.csv
        ├── origin1239_des1240_sd2019-04-01_ed2019-06-30.csv
        ├── origin1239_des1240_sd2019-07-01_ed2019-09-30.csv
        ├── origin1239_des1240_sd2019-10-01_ed2019-12-31.csv
        ├── origin1240_des1241_sd2018-01-01_ed2018-03-31.csv
        ├── origin1240_des1241_sd2018-04-01_ed2018-06-30.csv
        ├── origin1240_des1241_sd2018-07-01_ed2018-09-30.csv
        ├── origin1240_des1241_sd2018-10-01_ed2018-12-31.csv
        ├── origin1240_des1241_sd2019-01-01_ed2019-03-31.csv
        ├── origin1240_des1241_sd2019-04-01_ed2019-06-30.csv
        ├── origin1240_des1241_sd2019-07-01_ed2019-09-30.csv
        ├── origin1240_des1241_sd2019-10-01_ed2019-12-31.csv
        ├── origin1240_des1243_sd2018-01-01_ed2018-03-31.csv
        ├── origin1240_des1243_sd2018-04-01_ed2018-06-30.csv
        ├── origin1240_des1243_sd2018-07-01_ed2018-09-30.csv
        ├── origin1240_des1243_sd2018-10-01_ed2018-12-31.csv
        ├── origin1240_des1243_sd2019-01-01_ed2019-03-31.csv
        ├── origin1240_des1243_sd2019-04-01_ed2019-06-30.csv
        ├── origin1240_des1243_sd2019-07-01_ed2019-09-30.csv
        ├── origin1240_des1243_sd2019-10-01_ed2019-12-31.csv
        ├── origin1241_des1243_sd2018-01-01_ed2018-03-31.csv
        ├── origin1241_des1243_sd2018-04-01_ed2018-06-30.csv
        ├── origin1241_des1243_sd2018-07-01_ed2018-09-30.csv
        ├── origin1241_des1243_sd2018-10-01_ed2018-12-31.csv
        ├── origin1241_des1243_sd2019-01-01_ed2019-03-31.csv
        ├── origin1241_des1243_sd2019-04-01_ed2019-06-30.csv
        ├── origin1241_des1243_sd2019-07-01_ed2019-09-30.csv
        ├── origin1241_des1243_sd2019-10-01_ed2019-12-31.csv
        ├── origin1241_des1246_sd2018-01-01_ed2018-03-31.csv
        ├── origin1241_des1246_sd2018-04-01_ed2018-06-30.csv
        ├── origin1241_des1246_sd2018-07-01_ed2018-09-30.csv
        ├── origin1241_des1246_sd2018-10-01_ed2018-12-31.csv
        ├── origin1241_des1246_sd2019-01-01_ed2019-03-31.csv
        ├── origin1241_des1246_sd2019-04-01_ed2019-06-30.csv
        ├── origin1241_des1246_sd2019-07-01_ed2019-09-30.csv
        ├── origin1241_des1246_sd2019-10-01_ed2019-12-31.csv
        ├── origin1241_des1247_sd2018-01-01_ed2018-03-31.csv
        ├── origin1241_des1247_sd2018-04-01_ed2018-06-30.csv
        ├── origin1241_des1247_sd2018-07-01_ed2018-09-30.csv
        ├── origin1241_des1247_sd2018-10-01_ed2018-12-31.csv
        ├── origin1241_des1247_sd2019-01-01_ed2019-03-31.csv
        ├── origin1241_des1247_sd2019-04-01_ed2019-06-30.csv
        ├── origin1241_des1247_sd2019-07-01_ed2019-09-30.csv
        ├── origin1241_des1247_sd2019-10-01_ed2019-12-31.csv
        ├── origin1242_des1244_sd2018-01-01_ed2018-03-31.csv
        ├── origin1242_des1244_sd2018-04-01_ed2018-06-30.csv
        ├── origin1242_des1244_sd2018-07-01_ed2018-09-30.csv
        ├── origin1242_des1244_sd2018-10-01_ed2018-12-31.csv
        ├── origin1242_des1244_sd2019-01-01_ed2019-03-31.csv
        ├── origin1242_des1244_sd2019-04-01_ed2019-06-30.csv
        ├── origin1242_des1244_sd2019-07-01_ed2019-09-30.csv
        ├── origin1242_des1244_sd2019-10-01_ed2019-12-31.csv
        ├── origin1243_des1242_sd2018-01-01_ed2018-03-31.csv
        ├── origin1243_des1242_sd2018-04-01_ed2018-06-30.csv
        ├── origin1243_des1242_sd2018-07-01_ed2018-09-30.csv
        ├── origin1243_des1242_sd2018-10-01_ed2018-12-31.csv
        ├── origin1243_des1242_sd2019-01-01_ed2019-03-31.csv
        ├── origin1243_des1242_sd2019-04-01_ed2019-06-30.csv
        ├── origin1243_des1242_sd2019-07-01_ed2019-09-30.csv
        ├── origin1243_des1242_sd2019-10-01_ed2019-12-31.csv
        ├── origin1243_des1244_sd2018-01-01_ed2018-03-31.csv
        ├── origin1243_des1244_sd2018-04-01_ed2018-06-30.csv
        ├── origin1243_des1244_sd2018-07-01_ed2018-09-30.csv
        ├── origin1243_des1244_sd2018-10-01_ed2018-12-31.csv
        ├── origin1243_des1244_sd2019-01-01_ed2019-03-31.csv
        ├── origin1243_des1244_sd2019-04-01_ed2019-06-30.csv
        ├── origin1243_des1244_sd2019-07-01_ed2019-09-30.csv
        ├── origin1243_des1244_sd2019-10-01_ed2019-12-31.csv
        ├── origin1243_des1245_sd2018-01-01_ed2018-03-31.csv
        ├── origin1243_des1245_sd2018-04-01_ed2018-06-30.csv
        ├── origin1243_des1245_sd2018-07-01_ed2018-09-30.csv
        ├── origin1243_des1245_sd2018-10-01_ed2018-12-31.csv
        ├── origin1243_des1245_sd2019-01-01_ed2019-03-31.csv
        ├── origin1243_des1245_sd2019-04-01_ed2019-06-30.csv
        ├── origin1243_des1245_sd2019-07-01_ed2019-09-30.csv
        ├── origin1243_des1245_sd2019-10-01_ed2019-12-31.csv
        ├── origin1245_des1244_sd2018-01-01_ed2018-03-31.csv
        ├── origin1245_des1244_sd2018-04-01_ed2018-06-30.csv
        ├── origin1245_des1244_sd2018-07-01_ed2018-09-30.csv
        ├── origin1245_des1244_sd2018-10-01_ed2018-12-31.csv
        ├── origin1245_des1244_sd2019-01-01_ed2019-03-31.csv
        ├── origin1245_des1244_sd2019-04-01_ed2019-06-30.csv
        ├── origin1245_des1244_sd2019-07-01_ed2019-09-30.csv
        ├── origin1245_des1244_sd2019-10-01_ed2019-12-31.csv
        ├── origin1245_des1256_sd2018-01-01_ed2018-03-31.csv
        ├── origin1245_des1256_sd2018-04-01_ed2018-06-30.csv
        ├── origin1245_des1256_sd2018-07-01_ed2018-09-30.csv
        ├── origin1245_des1256_sd2018-10-01_ed2018-12-31.csv
        ├── origin1245_des1256_sd2019-01-01_ed2019-03-31.csv
        ├── origin1245_des1256_sd2019-04-01_ed2019-06-30.csv
        ├── origin1245_des1256_sd2019-07-01_ed2019-09-30.csv
        ├── origin1245_des1256_sd2019-10-01_ed2019-12-31.csv
        ├── origin1246_des1247_sd2018-01-01_ed2018-03-31.csv
        ├── origin1246_des1247_sd2018-04-01_ed2018-06-30.csv
        ├── origin1246_des1247_sd2018-07-01_ed2018-09-30.csv
        ├── origin1246_des1247_sd2018-10-01_ed2018-12-31.csv
        ├── origin1246_des1247_sd2019-01-01_ed2019-03-31.csv
        ├── origin1246_des1247_sd2019-04-01_ed2019-06-30.csv
        ├── origin1246_des1247_sd2019-07-01_ed2019-09-30.csv
        ├── origin1246_des1247_sd2019-10-01_ed2019-12-31.csv
        ├── origin1246_des1256_sd2018-01-01_ed2018-03-31.csv
        ├── origin1246_des1256_sd2018-04-01_ed2018-06-30.csv
        ├── origin1246_des1256_sd2018-07-01_ed2018-09-30.csv
        ├── origin1246_des1256_sd2018-10-01_ed2018-12-31.csv
        ├── origin1246_des1256_sd2019-01-01_ed2019-03-31.csv
        ├── origin1246_des1256_sd2019-04-01_ed2019-06-30.csv
        ├── origin1246_des1256_sd2019-07-01_ed2019-09-30.csv
        ├── origin1246_des1256_sd2019-10-01_ed2019-12-31.csv
        ├── origin1247_des1245_sd2018-01-01_ed2018-03-31.csv
        ├── origin1247_des1245_sd2018-04-01_ed2018-06-30.csv
        ├── origin1247_des1245_sd2018-07-01_ed2018-09-30.csv
        ├── origin1247_des1245_sd2018-10-01_ed2018-12-31.csv
        ├── origin1247_des1245_sd2019-01-01_ed2019-03-31.csv
        ├── origin1247_des1245_sd2019-04-01_ed2019-06-30.csv
        ├── origin1247_des1245_sd2019-07-01_ed2019-09-30.csv
        ├── origin1247_des1245_sd2019-10-01_ed2019-12-31.csv
        ├── origin1247_des1256_sd2018-01-01_ed2018-03-31.csv
        ├── origin1247_des1256_sd2018-04-01_ed2018-06-30.csv
        ├── origin1247_des1256_sd2018-07-01_ed2018-09-30.csv
        ├── origin1247_des1256_sd2018-10-01_ed2018-12-31.csv
        ├── origin1247_des1256_sd2019-01-01_ed2019-03-31.csv
        ├── origin1247_des1256_sd2019-04-01_ed2019-06-30.csv
        ├── origin1247_des1256_sd2019-07-01_ed2019-09-30.csv
        ├── origin1247_des1256_sd2019-10-01_ed2019-12-31.csv
        ├── origin1248_des1249_sd2018-01-01_ed2018-03-31.csv
        ├── origin1248_des1249_sd2018-04-01_ed2018-06-30.csv
        ├── origin1248_des1249_sd2018-07-01_ed2018-09-30.csv
        ├── origin1248_des1249_sd2018-10-01_ed2018-12-31.csv
        ├── origin1248_des1249_sd2019-01-01_ed2019-03-31.csv
        ├── origin1248_des1249_sd2019-04-01_ed2019-06-30.csv
        ├── origin1248_des1249_sd2019-07-01_ed2019-09-30.csv
        ├── origin1248_des1249_sd2019-10-01_ed2019-12-31.csv
        ├── origin1248_des1258_sd2018-01-01_ed2018-03-31.csv
        ├── origin1248_des1258_sd2018-04-01_ed2018-06-30.csv
        ├── origin1248_des1258_sd2018-07-01_ed2018-09-30.csv
        ├── origin1248_des1258_sd2018-10-01_ed2018-12-31.csv
        ├── origin1248_des1258_sd2019-01-01_ed2019-03-31.csv
        ├── origin1248_des1258_sd2019-04-01_ed2019-06-30.csv
        ├── origin1248_des1258_sd2019-07-01_ed2019-09-30.csv
        ├── origin1248_des1258_sd2019-10-01_ed2019-12-31.csv
        ├── origin1249_des1246_sd2018-01-01_ed2018-03-31.csv
        ├── origin1249_des1246_sd2018-04-01_ed2018-06-30.csv
        ├── origin1249_des1246_sd2018-07-01_ed2018-09-30.csv
        ├── origin1249_des1246_sd2018-10-01_ed2018-12-31.csv
        ├── origin1249_des1246_sd2019-01-01_ed2019-03-31.csv
        ├── origin1249_des1246_sd2019-04-01_ed2019-06-30.csv
        ├── origin1249_des1246_sd2019-07-01_ed2019-09-30.csv
        ├── origin1249_des1246_sd2019-10-01_ed2019-12-31.csv
        ├── origin1249_des1257_sd2018-01-01_ed2018-03-31.csv
        ├── origin1249_des1257_sd2018-04-01_ed2018-06-30.csv
        ├── origin1249_des1257_sd2018-07-01_ed2018-09-30.csv
        ├── origin1249_des1257_sd2018-10-01_ed2018-12-31.csv
        ├── origin1249_des1257_sd2019-01-01_ed2019-03-31.csv
        ├── origin1249_des1257_sd2019-04-01_ed2019-06-30.csv
        ├── origin1249_des1257_sd2019-07-01_ed2019-09-30.csv
        ├── origin1249_des1257_sd2019-10-01_ed2019-12-31.csv
        ├── origin1250_des1248_sd2018-01-01_ed2018-03-31.csv
        ├── origin1250_des1248_sd2018-04-01_ed2018-06-30.csv
        ├── origin1250_des1248_sd2018-07-01_ed2018-09-30.csv
        ├── origin1250_des1248_sd2018-10-01_ed2018-12-31.csv
        ├── origin1250_des1248_sd2019-01-01_ed2019-03-31.csv
        ├── origin1250_des1248_sd2019-04-01_ed2019-06-30.csv
        ├── origin1250_des1248_sd2019-07-01_ed2019-09-30.csv
        ├── origin1250_des1248_sd2019-10-01_ed2019-12-31.csv
        ├── origin1250_des1249_sd2018-01-01_ed2018-03-31.csv
        ├── origin1250_des1249_sd2018-04-01_ed2018-06-30.csv
        ├── origin1250_des1249_sd2018-07-01_ed2018-09-30.csv
        ├── origin1250_des1249_sd2018-10-01_ed2018-12-31.csv
        ├── origin1250_des1249_sd2019-01-01_ed2019-03-31.csv
        ├── origin1250_des1249_sd2019-04-01_ed2019-06-30.csv
        ├── origin1250_des1249_sd2019-07-01_ed2019-09-30.csv
        ├── origin1250_des1249_sd2019-10-01_ed2019-12-31.csv
        ├── origin1251_des1248_sd2018-01-01_ed2018-03-31.csv
        ├── origin1251_des1248_sd2018-04-01_ed2018-06-30.csv
        ├── origin1251_des1248_sd2018-07-01_ed2018-09-30.csv
        ├── origin1251_des1248_sd2018-10-01_ed2018-12-31.csv
        ├── origin1251_des1248_sd2019-01-01_ed2019-03-31.csv
        ├── origin1251_des1248_sd2019-04-01_ed2019-06-30.csv
        ├── origin1251_des1248_sd2019-07-01_ed2019-09-30.csv
        ├── origin1251_des1248_sd2019-10-01_ed2019-12-31.csv
        ├── origin1251_des1250_sd2018-01-01_ed2018-03-31.csv
        ├── origin1251_des1250_sd2018-04-01_ed2018-06-30.csv
        ├── origin1251_des1250_sd2018-07-01_ed2018-09-30.csv
        ├── origin1251_des1250_sd2018-10-01_ed2018-12-31.csv
        ├── origin1251_des1250_sd2019-01-01_ed2019-03-31.csv
        ├── origin1251_des1250_sd2019-04-01_ed2019-06-30.csv
        ├── origin1251_des1250_sd2019-07-01_ed2019-09-30.csv
        ├── origin1251_des1250_sd2019-10-01_ed2019-12-31.csv
        ├── origin1251_des1255_sd2018-01-01_ed2018-03-31.csv
        ├── origin1251_des1255_sd2018-04-01_ed2018-06-30.csv
        ├── origin1251_des1255_sd2018-07-01_ed2018-09-30.csv
        ├── origin1251_des1255_sd2018-10-01_ed2018-12-31.csv
        ├── origin1251_des1255_sd2019-01-01_ed2019-03-31.csv
        ├── origin1251_des1255_sd2019-04-01_ed2019-06-30.csv
        ├── origin1251_des1255_sd2019-07-01_ed2019-09-30.csv
        ├── origin1251_des1255_sd2019-10-01_ed2019-12-31.csv
        ├── origin1252_des1251_sd2018-01-01_ed2018-03-31.csv
        ├── origin1252_des1251_sd2018-04-01_ed2018-06-30.csv
        ├── origin1252_des1251_sd2018-07-01_ed2018-09-30.csv
        ├── origin1252_des1251_sd2018-10-01_ed2018-12-31.csv
        ├── origin1252_des1251_sd2019-01-01_ed2019-03-31.csv
        ├── origin1252_des1251_sd2019-04-01_ed2019-06-30.csv
        ├── origin1252_des1251_sd2019-07-01_ed2019-09-30.csv
        ├── origin1252_des1251_sd2019-10-01_ed2019-12-31.csv
        ├── origin1252_des1253_sd2018-01-01_ed2018-03-31.csv
        ├── origin1252_des1253_sd2018-04-01_ed2018-06-30.csv
        ├── origin1252_des1253_sd2018-07-01_ed2018-09-30.csv
        ├── origin1252_des1253_sd2018-10-01_ed2018-12-31.csv
        ├── origin1252_des1253_sd2019-01-01_ed2019-03-31.csv
        ├── origin1252_des1253_sd2019-04-01_ed2019-06-30.csv
        ├── origin1252_des1253_sd2019-07-01_ed2019-09-30.csv
        ├── origin1252_des1253_sd2019-10-01_ed2019-12-31.csv
        ├── origin1253_des1250_sd2018-01-01_ed2018-03-31.csv
        ├── origin1253_des1250_sd2018-04-01_ed2018-06-30.csv
        ├── origin1253_des1250_sd2018-07-01_ed2018-09-30.csv
        ├── origin1253_des1250_sd2018-10-01_ed2018-12-31.csv
        ├── origin1253_des1250_sd2019-01-01_ed2019-03-31.csv
        ├── origin1253_des1250_sd2019-04-01_ed2019-06-30.csv
        ├── origin1253_des1250_sd2019-07-01_ed2019-09-30.csv
        ├── origin1253_des1250_sd2019-10-01_ed2019-12-31.csv
        ├── origin1253_des1251_sd2018-01-01_ed2018-03-31.csv
        ├── origin1253_des1251_sd2018-04-01_ed2018-06-30.csv
        ├── origin1253_des1251_sd2018-07-01_ed2018-09-30.csv
        ├── origin1253_des1251_sd2018-10-01_ed2018-12-31.csv
        ├── origin1253_des1251_sd2019-01-01_ed2019-03-31.csv
        ├── origin1253_des1251_sd2019-04-01_ed2019-06-30.csv
        ├── origin1253_des1251_sd2019-07-01_ed2019-09-30.csv
        ├── origin1253_des1251_sd2019-10-01_ed2019-12-31.csv
        ├── origin1254_des1251_sd2018-01-01_ed2018-03-31.csv
        ├── origin1254_des1251_sd2018-04-01_ed2018-06-30.csv
        ├── origin1254_des1251_sd2018-07-01_ed2018-09-30.csv
        ├── origin1254_des1251_sd2018-10-01_ed2018-12-31.csv
        ├── origin1254_des1251_sd2019-01-01_ed2019-03-31.csv
        ├── origin1254_des1251_sd2019-04-01_ed2019-06-30.csv
        ├── origin1254_des1251_sd2019-07-01_ed2019-09-30.csv
        ├── origin1254_des1251_sd2019-10-01_ed2019-12-31.csv
        ├── origin1254_des1252_sd2018-01-01_ed2018-03-31.csv
        ├── origin1254_des1252_sd2018-04-01_ed2018-06-30.csv
        ├── origin1254_des1252_sd2018-07-01_ed2018-09-30.csv
        ├── origin1254_des1252_sd2018-10-01_ed2018-12-31.csv
        ├── origin1254_des1252_sd2019-01-01_ed2019-03-31.csv
        ├── origin1254_des1252_sd2019-04-01_ed2019-06-30.csv
        ├── origin1254_des1252_sd2019-07-01_ed2019-09-30.csv
        ├── origin1254_des1252_sd2019-10-01_ed2019-12-31.csv
        ├── origin1254_des1255_sd2018-01-01_ed2018-03-31.csv
        ├── origin1254_des1255_sd2018-04-01_ed2018-06-30.csv
        ├── origin1254_des1255_sd2018-07-01_ed2018-09-30.csv
        ├── origin1254_des1255_sd2018-10-01_ed2018-12-31.csv
        ├── origin1254_des1255_sd2019-01-01_ed2019-03-31.csv
        ├── origin1254_des1255_sd2019-04-01_ed2019-06-30.csv
        ├── origin1254_des1255_sd2019-07-01_ed2019-09-30.csv
        ├── origin1254_des1255_sd2019-10-01_ed2019-12-31.csv
        ├── origin1255_des1258_sd2018-01-01_ed2018-03-31.csv
        ├── origin1255_des1258_sd2018-04-01_ed2018-06-30.csv
        ├── origin1255_des1258_sd2018-07-01_ed2018-09-30.csv
        ├── origin1255_des1258_sd2018-10-01_ed2018-12-31.csv
        ├── origin1255_des1258_sd2019-01-01_ed2019-03-31.csv
        ├── origin1255_des1258_sd2019-04-01_ed2019-06-30.csv
        ├── origin1255_des1258_sd2019-07-01_ed2019-09-30.csv
        ├── origin1255_des1258_sd2019-10-01_ed2019-12-31.csv
        ├── origin1255_des1263_sd2018-01-01_ed2018-03-31.csv
        ├── origin1255_des1263_sd2018-04-01_ed2018-06-30.csv
        ├── origin1255_des1263_sd2018-07-01_ed2018-09-30.csv
        ├── origin1255_des1263_sd2018-10-01_ed2018-12-31.csv
        ├── origin1255_des1263_sd2019-01-01_ed2019-03-31.csv
        ├── origin1255_des1263_sd2019-04-01_ed2019-06-30.csv
        ├── origin1255_des1263_sd2019-07-01_ed2019-09-30.csv
        ├── origin1255_des1263_sd2019-10-01_ed2019-12-31.csv
        ├── origin1256_des1259_sd2018-01-01_ed2018-03-31.csv
        ├── origin1256_des1259_sd2018-04-01_ed2018-06-30.csv
        ├── origin1256_des1259_sd2018-07-01_ed2018-09-30.csv
        ├── origin1256_des1259_sd2018-10-01_ed2018-12-31.csv
        ├── origin1256_des1259_sd2019-01-01_ed2019-03-31.csv
        ├── origin1256_des1259_sd2019-04-01_ed2019-06-30.csv
        ├── origin1256_des1259_sd2019-07-01_ed2019-09-30.csv
        ├── origin1256_des1259_sd2019-10-01_ed2019-12-31.csv
        ├── origin1257_des1256_sd2018-01-01_ed2018-03-31.csv
        ├── origin1257_des1256_sd2018-04-01_ed2018-06-30.csv
        ├── origin1257_des1256_sd2018-07-01_ed2018-09-30.csv
        ├── origin1257_des1256_sd2018-10-01_ed2018-12-31.csv
        ├── origin1257_des1256_sd2019-01-01_ed2019-03-31.csv
        ├── origin1257_des1256_sd2019-04-01_ed2019-06-30.csv
        ├── origin1257_des1256_sd2019-07-01_ed2019-09-30.csv
        ├── origin1257_des1256_sd2019-10-01_ed2019-12-31.csv
        ├── origin1257_des1260_sd2018-01-01_ed2018-03-31.csv
        ├── origin1257_des1260_sd2018-04-01_ed2018-06-30.csv
        ├── origin1257_des1260_sd2018-07-01_ed2018-09-30.csv
        ├── origin1257_des1260_sd2018-10-01_ed2018-12-31.csv
        ├── origin1257_des1260_sd2019-01-01_ed2019-03-31.csv
        ├── origin1257_des1260_sd2019-04-01_ed2019-06-30.csv
        ├── origin1257_des1260_sd2019-07-01_ed2019-09-30.csv
        ├── origin1257_des1260_sd2019-10-01_ed2019-12-31.csv
        ├── origin1258_des1257_sd2018-01-01_ed2018-03-31.csv
        ├── origin1258_des1257_sd2018-04-01_ed2018-06-30.csv
        ├── origin1258_des1257_sd2018-07-01_ed2018-09-30.csv
        ├── origin1258_des1257_sd2018-10-01_ed2018-12-31.csv
        ├── origin1258_des1257_sd2019-01-01_ed2019-03-31.csv
        ├── origin1258_des1257_sd2019-04-01_ed2019-06-30.csv
        ├── origin1258_des1257_sd2019-07-01_ed2019-09-30.csv
        ├── origin1258_des1257_sd2019-10-01_ed2019-12-31.csv
        ├── origin1258_des1260_sd2018-01-01_ed2018-03-31.csv
        ├── origin1258_des1260_sd2018-04-01_ed2018-06-30.csv
        ├── origin1258_des1260_sd2018-07-01_ed2018-09-30.csv
        ├── origin1258_des1260_sd2018-10-01_ed2018-12-31.csv
        ├── origin1258_des1260_sd2019-01-01_ed2019-03-31.csv
        ├── origin1258_des1260_sd2019-04-01_ed2019-06-30.csv
        ├── origin1258_des1260_sd2019-07-01_ed2019-09-30.csv
        ├── origin1258_des1260_sd2019-10-01_ed2019-12-31.csv
        ├── origin1259_des1261_sd2018-01-01_ed2018-03-31.csv
        ├── origin1259_des1261_sd2018-04-01_ed2018-06-30.csv
        ├── origin1259_des1261_sd2018-07-01_ed2018-09-30.csv
        ├── origin1259_des1261_sd2018-10-01_ed2018-12-31.csv
        ├── origin1259_des1261_sd2019-01-01_ed2019-03-31.csv
        ├── origin1259_des1261_sd2019-04-01_ed2019-06-30.csv
        ├── origin1259_des1261_sd2019-07-01_ed2019-09-30.csv
        ├── origin1259_des1261_sd2019-10-01_ed2019-12-31.csv
        ├── origin1260_des1259_sd2018-01-01_ed2018-03-31.csv
        ├── origin1260_des1259_sd2018-04-01_ed2018-06-30.csv
        ├── origin1260_des1259_sd2018-07-01_ed2018-09-30.csv
        ├── origin1260_des1259_sd2018-10-01_ed2018-12-31.csv
        ├── origin1260_des1259_sd2019-01-01_ed2019-03-31.csv
        ├── origin1260_des1259_sd2019-04-01_ed2019-06-30.csv
        ├── origin1260_des1259_sd2019-07-01_ed2019-09-30.csv
        ├── origin1260_des1259_sd2019-10-01_ed2019-12-31.csv
        ├── origin1260_des1262_sd2018-01-01_ed2018-03-31.csv
        ├── origin1260_des1262_sd2018-04-01_ed2018-06-30.csv
        ├── origin1260_des1262_sd2018-07-01_ed2018-09-30.csv
        ├── origin1260_des1262_sd2018-10-01_ed2018-12-31.csv
        ├── origin1260_des1262_sd2019-01-01_ed2019-03-31.csv
        ├── origin1260_des1262_sd2019-04-01_ed2019-06-30.csv
        ├── origin1260_des1262_sd2019-07-01_ed2019-09-30.csv
        ├── origin1260_des1262_sd2019-10-01_ed2019-12-31.csv
        ├── origin1261_des1383_sd2018-01-01_ed2018-03-31.csv
        ├── origin1261_des1383_sd2018-04-01_ed2018-06-30.csv
        ├── origin1261_des1383_sd2018-07-01_ed2018-09-30.csv
        ├── origin1261_des1383_sd2018-10-01_ed2018-12-31.csv
        ├── origin1261_des1383_sd2019-01-01_ed2019-03-31.csv
        ├── origin1261_des1383_sd2019-04-01_ed2019-06-30.csv
        ├── origin1261_des1383_sd2019-07-01_ed2019-09-30.csv
        ├── origin1261_des1383_sd2019-10-01_ed2019-12-31.csv
        ├── origin1262_des1261_sd2018-01-01_ed2018-03-31.csv
        ├── origin1262_des1261_sd2018-04-01_ed2018-06-30.csv
        ├── origin1262_des1261_sd2018-07-01_ed2018-09-30.csv
        ├── origin1262_des1261_sd2018-10-01_ed2018-12-31.csv
        ├── origin1262_des1261_sd2019-01-01_ed2019-03-31.csv
        ├── origin1262_des1261_sd2019-04-01_ed2019-06-30.csv
        ├── origin1262_des1261_sd2019-07-01_ed2019-09-30.csv
        ├── origin1262_des1261_sd2019-10-01_ed2019-12-31.csv
        ├── origin1262_des1384_sd2018-01-01_ed2018-03-31.csv
        ├── origin1262_des1384_sd2018-04-01_ed2018-06-30.csv
        ├── origin1262_des1384_sd2018-07-01_ed2018-09-30.csv
        ├── origin1262_des1384_sd2018-10-01_ed2018-12-31.csv
        ├── origin1262_des1384_sd2019-01-01_ed2019-03-31.csv
        ├── origin1262_des1384_sd2019-04-01_ed2019-06-30.csv
        ├── origin1262_des1384_sd2019-07-01_ed2019-09-30.csv
        ├── origin1262_des1384_sd2019-10-01_ed2019-12-31.csv
        ├── origin1263_des1260_sd2018-01-01_ed2018-03-31.csv
        ├── origin1263_des1260_sd2018-04-01_ed2018-06-30.csv
        ├── origin1263_des1260_sd2018-07-01_ed2018-09-30.csv
        ├── origin1263_des1260_sd2018-10-01_ed2018-12-31.csv
        ├── origin1263_des1260_sd2019-01-01_ed2019-03-31.csv
        ├── origin1263_des1260_sd2019-04-01_ed2019-06-30.csv
        ├── origin1263_des1260_sd2019-07-01_ed2019-09-30.csv
        ├── origin1263_des1260_sd2019-10-01_ed2019-12-31.csv
        ├── origin1263_des1262_sd2018-01-01_ed2018-03-31.csv
        ├── origin1263_des1262_sd2018-04-01_ed2018-06-30.csv
        ├── origin1263_des1262_sd2018-07-01_ed2018-09-30.csv
        ├── origin1263_des1262_sd2018-10-01_ed2018-12-31.csv
        ├── origin1263_des1262_sd2019-01-01_ed2019-03-31.csv
        ├── origin1263_des1262_sd2019-04-01_ed2019-06-30.csv
        ├── origin1263_des1262_sd2019-07-01_ed2019-09-30.csv
        ├── origin1263_des1262_sd2019-10-01_ed2019-12-31.csv
        ├── origin1263_des1382_sd2018-01-01_ed2018-03-31.csv
        ├── origin1263_des1382_sd2018-04-01_ed2018-06-30.csv
        ├── origin1263_des1382_sd2018-07-01_ed2018-09-30.csv
        ├── origin1263_des1382_sd2018-10-01_ed2018-12-31.csv
        ├── origin1263_des1382_sd2019-01-01_ed2019-03-31.csv
        ├── origin1263_des1382_sd2019-04-01_ed2019-06-30.csv
        ├── origin1263_des1382_sd2019-07-01_ed2019-09-30.csv
        ├── origin1263_des1382_sd2019-10-01_ed2019-12-31.csv
        ├── origin1380_des1382_sd2018-01-01_ed2018-03-31.csv
        ├── origin1380_des1382_sd2018-04-01_ed2018-06-30.csv
        ├── origin1380_des1382_sd2018-07-01_ed2018-09-30.csv
        ├── origin1380_des1382_sd2018-10-01_ed2018-12-31.csv
        ├── origin1380_des1382_sd2019-01-01_ed2019-03-31.csv
        ├── origin1380_des1382_sd2019-04-01_ed2019-06-30.csv
        ├── origin1380_des1382_sd2019-07-01_ed2019-09-30.csv
        ├── origin1380_des1382_sd2019-10-01_ed2019-12-31.csv
        ├── origin1382_des1384_sd2018-01-01_ed2018-03-31.csv
        ├── origin1382_des1384_sd2018-04-01_ed2018-06-30.csv
        ├── origin1382_des1384_sd2018-07-01_ed2018-09-30.csv
        ├── origin1382_des1384_sd2018-10-01_ed2018-12-31.csv
        ├── origin1382_des1384_sd2019-01-01_ed2019-03-31.csv
        ├── origin1382_des1384_sd2019-04-01_ed2019-06-30.csv
        ├── origin1382_des1384_sd2019-07-01_ed2019-09-30.csv
        ├── origin1382_des1384_sd2019-10-01_ed2019-12-31.csv
        ├── origin1384_des1383_sd2018-01-01_ed2018-03-31.csv
        ├── origin1384_des1383_sd2018-04-01_ed2018-06-30.csv
        ├── origin1384_des1383_sd2018-07-01_ed2018-09-30.csv
        ├── origin1384_des1383_sd2018-10-01_ed2018-12-31.csv
        ├── origin1384_des1383_sd2019-01-01_ed2019-03-31.csv
        ├── origin1384_des1383_sd2019-04-01_ed2019-06-30.csv
        ├── origin1384_des1383_sd2019-07-01_ed2019-09-30.csv
        ├── origin1384_des1383_sd2019-10-01_ed2019-12-31.csv
        ├── origin1390_des1228_sd2018-01-01_ed2018-03-31.csv
        ├── origin1390_des1228_sd2018-04-01_ed2018-06-30.csv
        ├── origin1390_des1228_sd2018-07-01_ed2018-09-30.csv
        ├── origin1390_des1228_sd2018-10-01_ed2018-12-31.csv
        ├── origin1390_des1228_sd2019-01-01_ed2019-03-31.csv
        ├── origin1390_des1228_sd2019-04-01_ed2019-06-30.csv
        ├── origin1390_des1228_sd2019-07-01_ed2019-09-30.csv
        ├── origin1390_des1228_sd2019-10-01_ed2019-12-31.csv
        ├── origin1390_des1234_sd2018-01-01_ed2018-03-31.csv
        ├── origin1390_des1234_sd2018-04-01_ed2018-06-30.csv
        ├── origin1390_des1234_sd2018-07-01_ed2018-09-30.csv
        ├── origin1390_des1234_sd2018-10-01_ed2018-12-31.csv
        ├── origin1390_des1234_sd2019-01-01_ed2019-03-31.csv
        ├── origin1390_des1234_sd2019-04-01_ed2019-06-30.csv
        ├── origin1390_des1234_sd2019-07-01_ed2019-09-30.csv
        ├── origin1390_des1234_sd2019-10-01_ed2019-12-31.csv
        ├── origin1390_des1380_sd2018-01-01_ed2018-03-31.csv
        ├── origin1390_des1380_sd2018-04-01_ed2018-06-30.csv
        ├── origin1390_des1380_sd2018-07-01_ed2018-09-30.csv
        ├── origin1390_des1380_sd2018-10-01_ed2018-12-31.csv
        ├── origin1390_des1380_sd2019-01-01_ed2019-03-31.csv
        ├── origin1390_des1380_sd2019-04-01_ed2019-06-30.csv
        ├── origin1390_des1380_sd2019-07-01_ed2019-09-30.csv
        └── origin1390_des1380_sd2019-10-01_ed2019-12-31.csv
    ├── data_downloading.R
    ├── downtown_risks_forest_years_halfyear.csv
    ├── downtown_risks_forest_years_halfyear.pkl
    ├── downtown_risks_forest_years_onehalfyear.csv
    ├── downtown_risks_forest_years_onehalfyear.pkl
    ├── downtown_risks_forest_years_oneyear.csv
    ├── downtown_risks_forest_years_oneyear.pkl
    ├── downtown_risks_forest_years_twoyear.csv
    ├── downtown_risks_forest_years_twoyear.pkl
    ├── experiment_downtown_years.py
    ├── preprocessing.R
    ├── tree.py
    └── weather_measures.csv


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/.DS_Store


--------------------------------------------------------------------------------
/cvar/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/.DS_Store


--------------------------------------------------------------------------------
/cvar/__pycache__/tree.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/__pycache__/tree.cpython-36.pyc


--------------------------------------------------------------------------------
/cvar/__pycache__/tree.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/__pycache__/tree.cpython-38.pyc


--------------------------------------------------------------------------------
/cvar/experiment_cvar_lognormal.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from cvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | 
  7 | p = 10
  8 | runs = 50
  9 | n_jobs = 50
 10 | n_trees = 500;
 11 | Nx_test = 200
 12 | Ny_train = 1000
 13 | Ny_test = 2000
 14 | R = 0.1
 15 | alpha = 0.2
 16 | obj_coef_list = [0]
 17 | N_list = [100, 200, 400, 800]
 18 | lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False
 19 | seed = 0
 20 | 
 21 | generate_Y = generate_Y_lognormal
 22 | 
 23 | honesty = False;
 24 | verbose = False; oracle = False;
 25 | bootstrap = True; 
 26 | 
 27 | cond_mean = [lambda x: np.exp(x[:, 0])/5, lambda x: x[:, 0]/5, lambda x: np.abs(x[:, 0])/5]
 28 | cond_std = [lambda x: 1 - 0.5*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 1 - 0.5*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 1 - 0.5*((1<=x[:, 1])&(x[:, 1]<=3))]
 29 | 
 30 | risk_all = {}
 31 | feature_split_all = {}
 32 | results_eval_all = {}
 33 | feature_importance_all = {}
 34 | models_all = {}
 35 | 
 36 | direct = ''
 37 | date = ''
 38 | output = direct + date + "cvar_lognormal.txt"
 39 | 
 40 | with open(output, 'w') as f:
 41 |     print("start", file = f)
 42 | 
 43 | for N in N_list:
 44 |     risk_all[str(N)] = {}
 45 |     feature_split_all[str(N)] = {}
 46 |     results_eval_all[str(N)] = {}
 47 |     feature_importance_all[str(N)] = {}
 48 |     models_all[str(N)] = {}
 49 | 
 50 |     for obj_coef in obj_coef_list:
 51 | 
 52 |         n_proposals = N; 
 53 |         mtry = p;
 54 |         subsample_ratio = 1;
 55 |         max_depth=100; 
 56 |         min_leaf_size=10; 
 57 |         balancedness_tol = 0.2; 
 58 | 
 59 |         np.random.seed(seed)
 60 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 61 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std, seed = seed) for run in range(runs)]
 62 | 
 63 |         with open(output, 'a') as f:
 64 |             print("N: ", N, file = f)
 65 |             print("obj_coef: ", obj_coef, file = f)
 66 | 
 67 |         time1 = time.time()
 68 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_models_full)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 69 |                     alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 70 |                     n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 71 |                     verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap, seed = seed) for run in range(runs))
 72 |         time2 = time.time()
 73 |         with open(output, 'a') as f:
 74 |             print("time: ", time2 - time1, file = f)
 75 |             print("------------------------", file = f)
 76 |         models_all[str(N)][str(obj_coef)] = results_fit
 77 | 
 78 |         time1 = time.time()
 79 |         results_eval_all[str(N)][str(obj_coef)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_determ_constr_full)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 80 |                         Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 81 |                         alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 82 |                         verbose = False, generate_Y = generate_Y, seed = seed) for run in range(runs))
 83 |         time2 = time.time()
 84 |         with open(output, 'a') as f:
 85 |             print("time: ", time2 - time1, file = f)
 86 |             print("------------------------", file = f)
 87 | 
 88 |         risks = extract_risk(results_eval_all[str(N)][str(obj_coef)])
 89 |         with open(output, 'a') as f:
 90 |             print("risk:", N, file=f)
 91 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 92 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 93 |             print("------------------------", file = f)
 94 |         risk_all[str(N)][str(obj_coef)] = risks
 95 | 
 96 | =
 97 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_cvar_lognormal.pkl", "wb"))
 98 |         pickle.dump(risk_all, open(direct + date +  "risk_cvar_lognormal.pkl", "wb"))
 99 | 
100 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
101 |         feature_split_all[str(N)][str(obj_coef)] = feature_split_freq
102 | 
103 |         feature_importance = evaluate_feature_importance(results_fit, p)
104 |         feature_importance_all[str(N)][str(obj_coef)] = feature_importance
105 | 
106 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_cvar_lognormal.pkl", "wb"))
107 |         pickle.dump(feature_importance_all, open(direct + date +  "feature_imp_cvar_lognormal.pkl", "wb"))
108 |         
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/cvar/experiment_cvar_lognormal_honesty.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from cvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | # forest
  7 | 
  8 | p = 10
  9 | n_jobs = 50
 10 | runs = 50
 11 | n_trees = 500;
 12 | Nx_test = 200
 13 | Ny_train = 1000
 14 | Ny_test = 2000
 15 | R = 0.1
 16 | alpha = 0.1
 17 | obj_coef_list = [0]
 18 | N_list = [100, 200, 400, 800]
 19 | lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False
 20 | 
 21 | 
 22 | generate_Y = generate_Y_lognormal
 23 | 
 24 | honesty = False;
 25 | verbose = False; oracle = False;
 26 | bootstrap = False; 
 27 | 
 28 | cond_mean = [lambda x: np.exp(x[:, 0])/5, lambda x: x[:, 0]/5, lambda x: np.abs(x[:, 0])/5]
 29 | cond_std = [lambda x: 1 - 0.5*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 1 - 0.5*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 1 - 0.5*((1<=x[:, 1])&(x[:, 1]<=3))]
 30 | 
 31 | risk_all = {}
 32 | feature_split_all = {}
 33 | results_eval_all = {}
 34 | 
 35 | direct = ''
 36 | date = ''
 37 | output = "cvar_lognormal_honesty.txt"
 38 | 
 39 | with open(output, 'w') as f:
 40 |     print("start", file = f)
 41 | 
 42 | for N in N_list:
 43 |     risk_all[str(N)] = {}
 44 |     feature_split_all[str(N)] = {}
 45 |     results_eval_all[str(N)] = {}
 46 | 
 47 |     for obj_coef in obj_coef_list:
 48 | 
 49 |         n_proposals = N; 
 50 |         mtry = p;
 51 |         subsample_ratio = 0.63;
 52 |         max_depth=100; 
 53 |         min_leaf_size=10; 
 54 |         balancedness_tol = 0.2; 
 55 | 
 56 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 57 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 58 | 
 59 |         with open(output, 'a') as f:
 60 |             print("N: ", N, file = f)
 61 |             print("obj_coef: ", obj_coef, file = f)
 62 | 
 63 |         time1 = time.time()
 64 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest_honesty)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 65 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 66 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 67 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 68 |         time2 = time.time()
 69 |         with open(output, 'a') as f:
 70 |             print("time: ", time2 - time1, file = f)
 71 |             print("------------------------", file = f)
 72 | 
 73 |         time1 = time.time()
 74 |         results_eval_all[str(N)][str(obj_coef)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_determ_constr)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 75 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 76 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 77 |             verbose = False, generate_Y = generate_Y) for run in range(runs))
 78 |         time2 = time.time()
 79 |         with open(output, 'a') as f:
 80 |             print("time: ", time2 - time1, file = f)
 81 |             print("------------------------", file = f)
 82 | 
 83 |         risks = extract_risk(results_eval_all[str(N)][str(obj_coef)])
 84 |         with open(output, 'a') as f:
 85 |             print("risk:", N, file=f)
 86 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 87 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 88 |             print("------------------------", file = f)
 89 |         risk_all[str(N)][str(obj_coef)] = risks
 90 | 
 91 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
 92 |         with open(output, 'a') as f:
 93 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
 94 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
 95 |             print("---", file = f)
 96 |             print("----------------------", file = f)
 97 |             print("----------------------", file = f)
 98 |         feature_split_all[str(N)][str(obj_coef)] = feature_split_freq
 99 | 
100 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_cvar_lognormal_honesty.pkl", "wb"))
101 |         pickle.dump(risk_all, open(direct + date +  "risk_cvar_lognormal_honesty.pkl", "wb"))
102 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_cvar_lognormal_honesty.pkl", "wb"))
103 |         
104 | 
105 | 


--------------------------------------------------------------------------------
/cvar/experiment_cvar_lognormal_objcoef.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from cvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | 
  7 | p = 10
  8 | n_jobs = 50
  9 | runs = 50
 10 | n_trees = 500;
 11 | Nx_test = 200
 12 | Ny_train = 1000
 13 | Ny_test = 2000
 14 | R = 0.1
 15 | alpha = 0.1
 16 | obj_coef_list = [0, 1, 2]
 17 | N_list = [400]
 18 | lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False
 19 | 
 20 | 
 21 | generate_Y = generate_Y_lognormal
 22 | 
 23 | honesty = False;
 24 | verbose = False; oracle = False;
 25 | bootstrap = True; 
 26 | 
 27 | cond_mean = [lambda x: np.exp(x[:, 0])/5, lambda x: x[:, 0]/5, lambda x: np.abs(x[:, 0])/5]
 28 | cond_std = [lambda x: 1 - 0.5*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 1 - 0.5*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 1 - 0.5*((1<=x[:, 1])&(x[:, 1]<=3))]
 29 | 
 30 | risk_all = {}
 31 | feature_split_all = {}
 32 | results_eval_all = {}
 33 | 
 34 | direct = ''
 35 | date = ''
 36 | output = "cvar_lognormal_objcoef.txt"
 37 | 
 38 | with open(output, 'w') as f:
 39 |     print("start", file = f)
 40 | 
 41 | for N in N_list:
 42 |     risk_all[str(N)] = {}
 43 |     feature_split_all[str(N)] = {}
 44 |     results_eval_all[str(N)] = {}
 45 | 
 46 |     for obj_coef in obj_coef_list:
 47 | 
 48 |         n_proposals = N; 
 49 |         mtry = p;
 50 |         subsample_ratio = 1;
 51 |         max_depth=100; 
 52 |         min_leaf_size=10; 
 53 |         balancedness_tol = 0.2; 
 54 | 
 55 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 56 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 57 | 
 58 |         with open(output, 'a') as f:
 59 |             print("N: ", N, file = f)
 60 |             print("obj_coef: ", obj_coef, file = f)
 61 | 
 62 |         time1 = time.time()
 63 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 64 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 65 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 66 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 67 |         time2 = time.time()
 68 |         with open(output, 'a') as f:
 69 |             print("time: ", time2 - time1, file = f)
 70 |             print("------------------------", file = f)
 71 | 
 72 |         time1 = time.time()
 73 |         results_eval_all[str(N)][str(obj_coef)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_determ_constr)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 74 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 75 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 76 |             verbose = False, generate_Y = generate_Y) for run in range(runs))
 77 |         time2 = time.time()
 78 |         with open(output, 'a') as f:
 79 |             print("time: ", time2 - time1, file = f)
 80 |             print("------------------------", file = f)
 81 | 
 82 |         risks = extract_risk(results_eval_all[str(N)][str(obj_coef)])
 83 |         with open(output, 'a') as f:
 84 |             print("risk:", N, file=f)
 85 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 86 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 87 |             print("------------------------", file = f)
 88 |         risk_all[str(N)][str(obj_coef)] = risks
 89 | 
 90 | 
 91 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
 92 |         with open(output, 'a') as f:
 93 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
 94 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
 95 |             print("---", file = f)
 96 |             print("----------------------", file = f)
 97 |             print("----------------------", file = f)
 98 |         feature_split_all[str(N)][str(obj_coef)] = feature_split_freq
 99 | 
100 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_cvar_lognormal_objcoef.pkl", "wb"))
101 |         pickle.dump(risk_all, open(direct + date +  "risk_cvar_lognormal_objcoef.pkl", "wb"))
102 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_cvar_lognormal_objcoef.pkl", "wb"))
103 |         
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/cvar/experiment_cvar_lognormal_oracle.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from cvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | 
  7 | p = 10
  8 | n_jobs = 50
  9 | runs = 50
 10 | n_trees = 50;
 11 | Nx_test = 200
 12 | Ny_train = 1000
 13 | Ny_test = 2000
 14 | R = 0.1
 15 | alpha = 0.1
 16 | obj_coef_list = [0]
 17 | N_list = [100, 200, 400]
 18 | lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False
 19 | seed = 0
 20 | 
 21 | generate_Y = generate_Y_lognormal
 22 | 
 23 | honesty = False;
 24 | verbose = False; oracle = True;
 25 | bootstrap = True; 
 26 | 
 27 | cond_mean = [lambda x: np.exp(x[:, 0])/5, lambda x: x[:, 0]/5, lambda x: np.abs(x[:, 0])/5]
 28 | cond_std = [lambda x: 1 - 0.5*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 1 - 0.5*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 1 - 0.5*((1<=x[:, 1])&(x[:, 1]<=3))]
 29 | 
 30 | risk_all = {}
 31 | feature_split_all = {}
 32 | results_eval_all = {}
 33 | 
 34 | direct = ''
 35 | date = ''
 36 | output = "cvar_lognormal_oracle.txt"
 37 | 
 38 | with open(output, 'w') as f:
 39 |     print("start", file = f)
 40 | 
 41 | for N in N_list:
 42 |     risk_all[str(N)] = {}
 43 |     feature_split_all[str(N)] = {}
 44 |     results_eval_all[str(N)] = {}
 45 | 
 46 |     for obj_coef in obj_coef_list:
 47 | 
 48 |         n_proposals = N; 
 49 |         mtry = p;
 50 |         subsample_ratio = 1;
 51 |         max_depth=100; 
 52 |         min_leaf_size=10; 
 53 |         balancedness_tol = 0.2; 
 54 | 
 55 |         np.random.seed(seed)
 56 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 57 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std, seed = seed) for run in range(runs)]
 58 | 
 59 |         with open(output, 'a') as f:
 60 |             print("N: ", N, file = f)
 61 |             print("obj_coef: ", obj_coef, file = f)
 62 | 
 63 |         time1 = time.time()
 64 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 65 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 66 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 67 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap, seed = seed) for run in range(runs))
 68 |         time2 = time.time()
 69 |         with open(output, 'a') as f:
 70 |             print("time: ", time2 - time1, file = f)
 71 |             print("------------------------", file = f)
 72 | 
 73 |         time1 = time.time()
 74 |         results_eval_all[str(N)][str(obj_coef)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_determ_constr_full)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 75 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 76 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 77 |             verbose = False, generate_Y = generate_Y, seed = seed) for run in range(runs))
 78 |         time2 = time.time()
 79 |         with open(output, 'a') as f:
 80 |             print("time: ", time2 - time1, file = f)
 81 |             print("------------------------", file = f)
 82 | 
 83 |         risks = extract_risk(results_eval_all[str(N)][str(obj_coef)])
 84 |         with open(output, 'a') as f:
 85 |             print("risk:", N, file=f)
 86 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 87 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 88 |             print("------------------------", file = f)
 89 |         risk_all[str(N)][str(obj_coef)] = risks
 90 | 
 91 |         cond_sharpe = evaluate_cond_sharpe(results_eval_all[str(N)][str(obj_coef)], R = R)
 92 |         with open(output, 'a') as f:
 93 |             print("conditional sharpe ratio:", N, file=f)
 94 |             for k,v in sorted(cond_sharpe.items(), key = lambda x: x[1].mean()):
 95 |                 print(k,"sharpe ratio:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 96 |             print("------------------------", file = f)
 97 |         cond_sharpe_all[str(N)][str(obj_coef)] = cond_sharpe
 98 | 
 99 |         marginal_sharpe = evaluate_marginal_sharpe(results_eval_all[str(N)][str(obj_coef)], R = R)
100 |         with open(output, 'a') as f:
101 |             print("marginal sharpe ratio:", N, file=f)
102 |             for k,v in sorted(marginal_sharpe.items(), key = lambda x: x[1].mean()):
103 |                 print(k,"sharpe ratio:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
104 |             print("------------------------", file = f)
105 |         marginal_sharpe_all[str(N)][str(obj_coef)] = marginal_sharpe
106 | 
107 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
108 |         with open(output, 'a') as f:
109 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
110 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
111 |             print("---", file = f)
112 |             print("----------------------", file = f)
113 |             print("----------------------", file = f)
114 |         feature_split_all[str(N)][str(obj_coef)] = feature_split_freq
115 | 
116 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_cvar_lognormal_oracle.pkl", "wb"))
117 |         pickle.dump(risk_all, open(direct + date +  "risk_cvar_lognormal_oracle.pkl", "wb"))
118 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_cvar_lognormal_oracle.pkl", "wb"))
119 |         
120 | 
121 | 


--------------------------------------------------------------------------------
/cvar/experiment_cvar_normal.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from cvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | p = 10
  7 | n_jobs = 50
  8 | runs = 50
  9 | n_trees = 500;
 10 | Nx_test = 200
 11 | Ny_train = 1000
 12 | Ny_test = 2000
 13 | R = 0.1
 14 | alpha = 0.1
 15 | obj_coef_list = [0]
 16 | N_list = [100, 200, 400, 800]
 17 | lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False
 18 | 
 19 | 
 20 | generate_Y = generate_Y_normal
 21 | 
 22 | honesty = False;
 23 | verbose = False; oracle = False;
 24 | bootstrap = True; 
 25 | 
 26 | cond_mean = [lambda x: np.exp(x[:, 0]), lambda x: x[:, 0], lambda x: np.abs(x[:, 0])]
 27 | cond_std = [lambda x: 5 - 4*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 5 - 4*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 5 - 4*((1<=x[:, 1])&(x[:, 1]<=3))]
 28 | 
 29 | risk_all = {}
 30 | feature_split_all = {}
 31 | results_eval_all = {}
 32 | 
 33 | direct = ''
 34 | date = ''
 35 | output = "cvar_normal.txt"
 36 | 
 37 | with open(output, 'w') as f:
 38 |     print("start", file = f)
 39 | 
 40 | for N in N_list:
 41 |     risk_all[str(N)] = {}
 42 |     feature_split_all[str(N)] = {}
 43 |     results_eval_all[str(N)] = {}
 44 | 
 45 |     for obj_coef in obj_coef_list:
 46 | 
 47 |         n_proposals = N; 
 48 |         mtry = p;
 49 |         subsample_ratio = 1;
 50 |         max_depth=100; 
 51 |         min_leaf_size=10; 
 52 |         balancedness_tol = 0.2; 
 53 | 
 54 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 55 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 56 | 
 57 |         with open(output, 'a') as f:
 58 |             print("N: ", N, file = f)
 59 |             print("obj_coef: ", obj_coef, file = f)
 60 | 
 61 |         time1 = time.time()
 62 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 63 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 64 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 65 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 66 |         time2 = time.time()
 67 |         with open(output, 'a') as f:
 68 |             print("time: ", time2 - time1, file = f)
 69 |             print("------------------------", file = f)
 70 | 
 71 |         time1 = time.time()
 72 |         results_eval_all[str(N)][str(obj_coef)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_determ_constr)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 73 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 74 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 75 |             verbose = False, generate_Y = generate_Y) for run in range(runs))
 76 |         time2 = time.time()
 77 |         with open(output, 'a') as f:
 78 |             print("time: ", time2 - time1, file = f)
 79 |             print("------------------------", file = f)
 80 | 
 81 |         risks = extract_risk(results_eval_all[str(N)][str(obj_coef)])
 82 |         with open(output, 'a') as f:
 83 |             print("risk:", N, file=f)
 84 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 85 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 86 |             print("------------------------", file = f)
 87 |         risk_all[str(N)][str(obj_coef)] = risks
 88 | 
 89 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
 90 |         with open(output, 'a') as f:
 91 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
 92 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
 93 |             print("---", file = f)
 94 |             print("----------------------", file = f)
 95 |             print("----------------------", file = f)
 96 |         feature_split_all[str(N)][str(obj_coef)] = feature_split_freq
 97 | 
 98 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_cvar_normal.pkl", "wb"))
 99 |         pickle.dump(risk_all, open(direct + date +  "risk_cvar_normal.pkl", "wb"))
100 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_cvar_normal.pkl", "wb"))
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/cvar/experiment_cvar_normal_oracle.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from cvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | p = 10
  7 | n_jobs = 50
  8 | runs = 50
  9 | n_trees = 50;
 10 | Nx_test = 200
 11 | Ny_train = 1000
 12 | Ny_test = 2000
 13 | R = 0.1
 14 | alpha = 0.1
 15 | obj_coef_list = [0]
 16 | N_list = [100, 200, 400]
 17 | lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False
 18 | 
 19 | 
 20 | generate_Y = generate_Y_normal
 21 | 
 22 | honesty = False;
 23 | verbose = False; oracle = True;
 24 | bootstrap = True; 
 25 | 
 26 | cond_mean = [lambda x: np.exp(x[:, 0]), lambda x: x[:, 0], lambda x: np.abs(x[:, 0])]
 27 | cond_std = [lambda x: 5 - 4*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 5 - 4*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 5 - 4*((1<=x[:, 1])&(x[:, 1]<=3))]
 28 | 
 29 | risk_all = {}
 30 | feature_split_all = {}
 31 | results_eval_all = {}
 32 | 
 33 | direct = ''
 34 | date = ''
 35 | output = "cvar_normal_oracle.txt"
 36 | 
 37 | with open(output, 'w') as f:
 38 |     print("start", file = f)
 39 | 
 40 | for N in N_list:
 41 |     risk_all[str(N)] = {}
 42 |     feature_split_all[str(N)] = {}
 43 |     results_eval_all[str(N)] = {}
 44 | 
 45 |     for obj_coef in obj_coef_list:
 46 | 
 47 |         n_proposals = N; 
 48 |         mtry = p;
 49 |         subsample_ratio = 1;
 50 |         max_depth=100; 
 51 |         min_leaf_size=10; 
 52 |         balancedness_tol = 0.2; 
 53 | 
 54 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 55 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 56 | 
 57 |         with open(output, 'a') as f:
 58 |             print("N: ", N, file = f)
 59 |             print("obj_coef: ", obj_coef, file = f)
 60 | 
 61 |         time1 = time.time()
 62 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 63 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 64 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 65 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 66 |         time2 = time.time()
 67 |         with open(output, 'a') as f:
 68 |             print("time: ", time2 - time1, file = f)
 69 |             print("------------------------", file = f)
 70 | 
 71 |         time1 = time.time()
 72 |         results_eval_all[str(N)][str(obj_coef)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_determ_constr)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 73 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 74 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 75 |             verbose = False, generate_Y = generate_Y) for run in range(runs))
 76 |         time2 = time.time()
 77 |         with open(output, 'a') as f:
 78 |             print("time: ", time2 - time1, file = f)
 79 |             print("------------------------", file = f)
 80 | 
 81 |         risks = extract_risk(results_eval_all[str(N)][str(obj_coef)])
 82 |         with open(output, 'a') as f:
 83 |             print("risk:", N, file=f)
 84 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 85 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 86 |             print("------------------------", file = f)
 87 |         risk_all[str(N)][str(obj_coef)] = risks
 88 | 
 89 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
 90 |         with open(output, 'a') as f:
 91 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
 92 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
 93 |             print("---", file = f)
 94 |             print("----------------------", file = f)
 95 |             print("----------------------", file = f)
 96 |         feature_split_all[str(N)][str(obj_coef)] = feature_split_freq
 97 | 
 98 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_cvar_normal_oracle.pkl", "wb"))
 99 |         pickle.dump(risk_all, open(direct + date +  "risk_cvar_normal_oracle.pkl", "wb"))
100 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_cvar_normal_oracle.pkl", "wb"))
101 |         
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/cvar/feature_imp_cvar_lognormal.csv:
--------------------------------------------------------------------------------
 1 | ,rf_approx_sol,rf_approx_sol_unconstr,rf_approx_risk,rf_approx_risk_unconstr,rf_random,rf_rf
 2 | 0,0.12315448469182422,0.0,0.13937103312646298,0.25775769183297065,0.6730680109127384,1.0
 3 | 1,1.0,0.9525460495691064,1.0,1.0,0.21006344086008255,0.44376381065949316
 4 | 2,0.005172200788173924,1.0,0.019052875822791162,0.0,0.012496533356162446,0.0
 5 | 3,5.4804903052885165e-06,0.9845098218512057,0.011018774905961459,0.023985180589547038,0.41570050267478015,0.014110636816765037
 6 | 4,0.0015719018055580746,0.9989203559058301,0.018590555482632158,0.002757446819637922,0.1518121703084932,0.024328934016708194
 7 | 5,0.0,0.9648115352784055,0.0,0.026136125195939233,0.2569961063103058,0.012112609647180566
 8 | 6,0.0009032224852255523,0.9713684552390044,0.006821394058760307,0.017144953713117406,0.0,0.0035666842619798762
 9 | 7,0.004186464669080505,0.9765479621461677,0.016632745796581822,0.04062195760457596,0.25341185457208376,0.03300833426779803
10 | 8,0.0038826758454216827,0.9795271349503993,0.017251369125943324,0.012163720405945837,1.0,0.049345874149434955
11 | 9,0.004341955211300544,0.9798601899552372,0.020396541021515373,0.03671542381347663,0.2639491043798395,0.035170682590261446
12 | 


--------------------------------------------------------------------------------
/cvar/feature_imp_cvar_lognormal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/feature_imp_cvar_lognormal.pkl


--------------------------------------------------------------------------------
/cvar/feature_split_cvar_lognormal.csv:
--------------------------------------------------------------------------------
 1 | ,rf_approx_sol,rf_approx_sol_unconstr,rf_approx_risk,rf_approx_risk_unconstr,rf_random,rf_rf,grf
 2 | 0,0.13609773408029446,0.15079021539526716,0.11876149422627577,0.11694602825437826,0.10284549638000628,0.14867812849630596,0.11595331680546733
 3 | 1,0.13643752619126184,0.09649616052617162,0.13330959833221714,0.11970660537757684,0.08724920135179577,0.11821783639441483,0.11947232539848128
 4 | 2,0.09290406182834063,0.09305352985143253,0.09687185575450095,0.0978083814967909,0.07438970624367838,0.09027451752632688,0.09830622007664652
 5 | 3,0.08946689391194262,0.0943790976272143,0.09182713029288475,0.09403101015038055,0.11319733230931192,0.09186452900768648,0.09371769010807725
 6 | 4,0.09176851813454812,0.09274683469003948,0.09579464568472618,0.09607684804284194,0.088560765988745,0.09158909724338062,0.09648281419181476
 7 | 5,0.0886090407483402,0.09527779785599487,0.08961051419844748,0.09325044504506884,0.09663040382508614,0.09096751547161924,0.09333044396102838
 8 | 6,0.08945527835373818,0.09395671713554851,0.09145073582241167,0.09323807847188448,0.07290804361075018,0.0904240843451098,0.0933762561199792
 9 | 7,0.08970181137943603,0.09504211207557992,0.0921305079962646,0.09556415242701048,0.09851130355955126,0.09279144147632744,0.09546671224144568
10 | 8,0.09341268235065682,0.09456020032331444,0.09593909239266499,0.09693863140661381,0.167458978614625,0.09300609298000982,0.0973063679891608
11 | 9,0.09214645302145268,0.09369733451944295,0.0943044252996089,0.09643981932745312,0.09824876811644816,0.09218675705881657,0.09658785310789846
12 | 


--------------------------------------------------------------------------------
/cvar/feature_split_cvar_lognormal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/feature_split_cvar_lognormal.pkl


--------------------------------------------------------------------------------
/cvar/gurobi.log:
--------------------------------------------------------------------------------
1 | 
2 | Gurobi 8.1.1 (mac64, Python) logging started Mon Oct 12 08:20:24 2020
3 | 
4 | 


--------------------------------------------------------------------------------
/cvar/risk_cvar_lognormal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/risk_cvar_lognormal.pkl


--------------------------------------------------------------------------------
/cvar/risk_cvar_lognormal_honesty.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/risk_cvar_lognormal_honesty.pkl


--------------------------------------------------------------------------------
/cvar/risk_cvar_lognormal_objcoef.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/risk_cvar_lognormal_objcoef.pkl


--------------------------------------------------------------------------------
/cvar/risk_cvar_lognormal_oracle.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/risk_cvar_lognormal_oracle.pkl


--------------------------------------------------------------------------------
/cvar/risk_cvar_lognormal_updatestep.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/risk_cvar_lognormal_updatestep.pkl


--------------------------------------------------------------------------------
/cvar/risk_cvar_normal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/risk_cvar_normal.pkl


--------------------------------------------------------------------------------
/cvar/risk_cvar_normal_oracle.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/risk_cvar_normal_oracle.pkl


--------------------------------------------------------------------------------
/cvar/speed_cvar.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:504: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 13 |       "  if node.feature is -1:\n",
 14 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:512: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 15 |       "  if node.feature is -1:\n",
 16 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:529: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 17 |       "  if node.feature is -1:\n",
 18 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:537: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 19 |       "  if node.feature is -1:\n",
 20 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:547: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 21 |       "  if node.feature is -1:\n",
 22 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:557: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 23 |       "  if node.feature is -1:\n",
 24 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:504: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 25 |       "  if node.feature is -1:\n",
 26 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:512: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 27 |       "  if node.feature is -1:\n",
 28 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:529: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 29 |       "  if node.feature is -1:\n",
 30 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:537: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 31 |       "  if node.feature is -1:\n",
 32 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:547: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 33 |       "  if node.feature is -1:\n",
 34 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/tree.py:557: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 35 |       "  if node.feature is -1:\n",
 36 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/regression_tree.py:258: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 37 |       "  if node.feature is -1:\n",
 38 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/regression_tree.py:266: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 39 |       "  if node.feature is -1:\n",
 40 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/regression_tree.py:274: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 41 |       "  if node.feature is -1:\n",
 42 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/regression_tree.py:284: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 43 |       "  if node.feature is -1:\n",
 44 |       "/Users/maoxiaojie/Research/RF_SO/CSOForest/Plots_main_rev/cvar/regression_tree.py:294: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n",
 45 |       "  if node.feature is -1:\n"
 46 |      ]
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "%run tree.py\n",
 51 |     "%run cvar_tree_utilities.py\n",
 52 |     "%run regression_tree.py"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "N_list = [100, 200, 400]; p = 10\n",
 62 |     "R = 0.1; alpha = 0.1; obj_coef = 0\n",
 63 |     "lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False\n",
 64 |     "runs = 10\n",
 65 |     "\n",
 66 |     "generate_Y = generate_Y_lognormal\n",
 67 |     "cond_mean = [lambda x: np.exp(x[:, 0])/5, lambda x: x[:, 0]/5, lambda x: np.abs(x[:, 0])/5]\n",
 68 |     "cond_std = [lambda x: 1 - 0.5*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 1 - 0.5*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 1 - 0.5*((1<=x[:, 1])&(x[:, 1]<=3))]\n",
 69 |     "\n",
 70 |     "opt_solver = partial(solve_cvar, alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub, sum_bound = sum_bound, if_stoch_constr = if_stoch_constr)\n",
 71 |     "hessian_computer = partial(compute_hessian, alpha = alpha)\n",
 72 |     "active_constraint = partial(search_active_constraint,  R = R, lb = lb, ub = ub, sum_bound = sum_bound, if_stoch_constr = if_stoch_constr)\n",
 73 |     "gradient_computer = partial(compute_gradient,  alpha = alpha, R = R, obj_coef = obj_coef)\n",
 74 |     "update_step = partial(compute_update_step, R = R)\n",
 75 |     "\n",
 76 |     "time_list = {str(N): {key: np.zeros(runs) for key in [\"rf_approx_risk\", \"rf_approx_sol\", \"rf_oracle\"]} for N in N_list}"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "for N in N_list:\n",
 86 |     "    print(\"N:\", N)\n",
 87 |     "    n_proposals = N; \n",
 88 |     "    mtry = p;\n",
 89 |     "    subsample_ratio = 1;\n",
 90 |     "    max_depth=100; \n",
 91 |     "    min_leaf_size=10; \n",
 92 |     "    balancedness_tol = 0.2; \n",
 93 |     "    honesty = False;\n",
 94 |     "    verbose = False; oracle = True;\n",
 95 |     "    bootstrap = True; \n",
 96 |     "    \n",
 97 |     "    X_list = [np.random.normal(size = (N, p)) for run in range(runs)]\n",
 98 |     "    Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]\n",
 99 |     "\n",
100 |     "    for run in range(runs):\n",
101 |     "        print(\"run:\", run)\n",
102 |     "        Y = Y_list[run]; Y_est = Y_list[run]\n",
103 |     "        X = X_list[run]; X_est = X_list[run]; \n",
104 |     "        \n",
105 |     "        time1 = time.time()\n",
106 |     "        rf_approx_risk = build_tree(Y, X, Y_est, X_est, \n",
107 |     "                             opt_solver = opt_solver, hessian_computer = hessian_computer,\n",
108 |     "                             gradient_computer = gradient_computer, \n",
109 |     "                             search_active_constraint = active_constraint,\n",
110 |     "                             compute_update_step = update_step,\n",
111 |     "                             crit_computer = compute_crit_approx_risk, \n",
112 |     "                             honesty = honesty, mtry = mtry,\n",
113 |     "                             min_leaf_size = min_leaf_size, max_depth = max_depth, \n",
114 |     "                             n_proposals = n_proposals, balancedness_tol = balancedness_tol,\n",
115 |     "                             verbose = verbose)\n",
116 |     "        time2 = time.time()\n",
117 |     "        time_list[str(N)][\"rf_approx_risk\"][run] = time2 - time1\n",
118 |     "\n",
119 |     "        time1 = time.time()\n",
120 |     "        rf_approx_sol = build_tree(Y, X, Y_est, X_est, \n",
121 |     "                             opt_solver = opt_solver, hessian_computer = hessian_computer,\n",
122 |     "                             gradient_computer = gradient_computer, \n",
123 |     "                             search_active_constraint = active_constraint,\n",
124 |     "                             compute_update_step = update_step,\n",
125 |     "                             crit_computer = partial(compute_crit_approx_sol, obj_coef = obj_coef, alpha = alpha), \n",
126 |     "                             honesty = honesty, mtry = mtry,\n",
127 |     "                             min_leaf_size = min_leaf_size, max_depth = max_depth, \n",
128 |     "                             n_proposals = n_proposals, balancedness_tol = balancedness_tol,\n",
129 |     "                             verbose = verbose)\n",
130 |     "        time2 = time.time()\n",
131 |     "        time_list[str(N)][\"rf_approx_sol\"][run] = time2 - time1\n",
132 |     "\n",
133 |     "    \n",
134 |     "        time1 = time.time()\n",
135 |     "        rf_oracle = build_tree(Y, X, Y_est, X_est, \n",
136 |     "                             opt_solver = opt_solver, hessian_computer = hessian_computer,\n",
137 |     "                             gradient_computer = gradient_computer, \n",
138 |     "                             search_active_constraint = active_constraint,\n",
139 |     "                             compute_update_step = update_step,\n",
140 |     "                             crit_computer = partial(compute_crit_oracle, solver = opt_solver), \n",
141 |     "                             honesty = honesty, mtry = mtry,\n",
142 |     "                             min_leaf_size = min_leaf_size, max_depth = max_depth, \n",
143 |     "                             n_proposals = n_proposals, balancedness_tol = balancedness_tol,\n",
144 |     "                             verbose = verbose)\n",
145 |     "        time2 = time.time()\n",
146 |     "        time_list[str(N)][\"rf_oracle\"][run] = time2 - time1"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 7,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "pickle.dump(time_list, open(\"time_cvar.pkl\", \"wb\"))"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": []
164 |   }
165 |  ],
166 |  "metadata": {
167 |   "kernelspec": {
168 |    "display_name": "Python 3",
169 |    "language": "python",
170 |    "name": "python3"
171 |   },
172 |   "language_info": {
173 |    "codemirror_mode": {
174 |     "name": "ipython",
175 |     "version": 3
176 |    },
177 |    "file_extension": ".py",
178 |    "mimetype": "text/x-python",
179 |    "name": "python",
180 |    "nbconvert_exporter": "python",
181 |    "pygments_lexer": "ipython3",
182 |    "version": "3.8.8"
183 |   }
184 |  },
185 |  "nbformat": 4,
186 |  "nbformat_minor": 4
187 | }
188 | 


--------------------------------------------------------------------------------
/cvar/time_cvar.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/cvar/time_cvar.pkl


--------------------------------------------------------------------------------
/mean_var/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/.DS_Store


--------------------------------------------------------------------------------
/mean_var/.ipynb_checkpoints/speed_meanvar-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%run tree.py\n",
 10 |     "%run meanvar_tree_utilities.py\n",
 11 |     "%run regression_tree.py"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "N_list = [100, 200, 400]; p = 10\n",
 21 |     "R = 0.1; alpha = 0.1; obj_coef = 0\n",
 22 |     "lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False\n",
 23 |     "runs = 10\n",
 24 |     "\n",
 25 |     "generate_Y = generate_Y_lognormal\n",
 26 |     "cond_mean = [lambda x: np.exp(x[:, 0])/5, lambda x: x[:, 0]/5, lambda x: np.abs(x[:, 0])/5]\n",
 27 |     "cond_std = [lambda x: 1 - 0.5*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 1 - 0.5*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 1 - 0.5*((1<=x[:, 1])&(x[:, 1]<=3))]\n",
 28 |     "\n",
 29 |     "opt_solver = partial(solve_mean_variance, alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub, sum_bound = sum_bound, if_stoch_constr = if_stoch_constr)\n",
 30 |     "hessian_computer = partial(compute_hessian, alpha = alpha)\n",
 31 |     "active_constraint = partial(search_active_constraint,  R = R, lb = lb, ub = ub, sum_bound = sum_bound, if_stoch_constr = if_stoch_constr)\n",
 32 |     "gradient_computer = partial(compute_gradient,  alpha = alpha, R = R, obj_coef = obj_coef)\n",
 33 |     "update_step = partial(compute_update_step, R = R)\n",
 34 |     "\n",
 35 |     "time_list = {str(N): {key: np.zeros(runs) for key in [\"rf_approx_risk\", \"rf_approx_sol\",  \"rf_oracle\"]} for N in N_list}"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "for N in N_list:\n",
 45 |     "    print(\"N:\", N)\n",
 46 |     "    n_proposals = N; \n",
 47 |     "    mtry = p;\n",
 48 |     "    subsample_ratio = 1;\n",
 49 |     "    max_depth=100; \n",
 50 |     "    min_leaf_size=10; \n",
 51 |     "    balancedness_tol = 0.2; \n",
 52 |     "    honesty = False;\n",
 53 |     "    verbose = False; oracle = True;\n",
 54 |     "    bootstrap = True; \n",
 55 |     "    \n",
 56 |     "    X_list = [np.random.normal(size = (N, p)) for run in range(runs)]\n",
 57 |     "    Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]\n",
 58 |     "\n",
 59 |     "    for run in range(runs):\n",
 60 |     "        print(\"run:\", run)\n",
 61 |     "        Y = Y_list[run]; Y_est = Y_list[run]\n",
 62 |     "        X = X_list[run]; X_est = X_list[run]; \n",
 63 |     "        \n",
 64 |     "        time1 = time.time()\n",
 65 |     "        rf_approx_risk = build_tree(Y, X, Y_est, X_est, \n",
 66 |     "                             opt_solver = opt_solver, hessian_computer = hessian_computer,\n",
 67 |     "                             gradient_computer = gradient_computer, \n",
 68 |     "                             search_active_constraint = active_constraint,\n",
 69 |     "                             compute_update_step = update_step,\n",
 70 |     "                             crit_computer = compute_crit_approx_risk, \n",
 71 |     "                             honesty = honesty, mtry = mtry,\n",
 72 |     "                             min_leaf_size = min_leaf_size, max_depth = max_depth, \n",
 73 |     "                             n_proposals = n_proposals, balancedness_tol = balancedness_tol,\n",
 74 |     "                             verbose = verbose)\n",
 75 |     "        time2 = time.time()\n",
 76 |     "        time_list[str(N)][\"rf_approx_risk\"][run] = time2 - time1\n",
 77 |     "\n",
 78 |     "        time1 = time.time()\n",
 79 |     "        rf_approx_sol = build_tree(Y, X, Y_est, X_est, \n",
 80 |     "                             opt_solver = opt_solver, hessian_computer = hessian_computer,\n",
 81 |     "                             gradient_computer = gradient_computer, \n",
 82 |     "                             search_active_constraint = active_constraint,\n",
 83 |     "                             compute_update_step = update_step,\n",
 84 |     "                             crit_computer = partial(compute_crit_approx_sol, obj_coef = obj_coef, alpha = alpha), \n",
 85 |     "                             honesty = honesty, mtry = mtry,\n",
 86 |     "                             min_leaf_size = min_leaf_size, max_depth = max_depth, \n",
 87 |     "                             n_proposals = n_proposals, balancedness_tol = balancedness_tol,\n",
 88 |     "                             verbose = verbose)\n",
 89 |     "        time2 = time.time()\n",
 90 |     "        time_list[str(N)][\"rf_approx_sol\"][run] = time2 - time1\n",
 91 |     "\n",
 92 |     "\n",
 93 |     "        time1 = time.time()\n",
 94 |     "        rf_oracle = build_tree(Y, X, Y_est, X_est, \n",
 95 |     "                             opt_solver = opt_solver, hessian_computer = hessian_computer,\n",
 96 |     "                             gradient_computer = gradient_computer, \n",
 97 |     "                             search_active_constraint = active_constraint,\n",
 98 |     "                             compute_update_step = update_step,\n",
 99 |     "                             crit_computer = partial(compute_crit_oracle, solver = opt_solver), \n",
100 |     "                             honesty = honesty, mtry = mtry,\n",
101 |     "                             min_leaf_size = min_leaf_size, max_depth = max_depth, \n",
102 |     "                             n_proposals = n_proposals, balancedness_tol = balancedness_tol,\n",
103 |     "                             verbose = verbose)\n",
104 |     "        time2 = time.time()\n",
105 |     "        time_list[str(N)][\"rf_oracle\"][run] = time2 - time1\n",
106 |     "    "
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "pickle.dump(time_list, open(\"time_meanvar.pkl\", \"wb\"))"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": []
124 |   }
125 |  ],
126 |  "metadata": {
127 |   "kernelspec": {
128 |    "display_name": "Python 3",
129 |    "language": "python",
130 |    "name": "python3"
131 |   },
132 |   "language_info": {
133 |    "codemirror_mode": {
134 |     "name": "ipython",
135 |     "version": 3
136 |    },
137 |    "file_extension": ".py",
138 |    "mimetype": "text/x-python",
139 |    "name": "python",
140 |    "nbconvert_exporter": "python",
141 |    "pygments_lexer": "ipython3",
142 |    "version": "3.8.8"
143 |   }
144 |  },
145 |  "nbformat": 4,
146 |  "nbformat_minor": 4
147 | }
148 | 


--------------------------------------------------------------------------------
/mean_var/Plotting_meanvar.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Plotting_meanvar"
  3 | author: "Xiaojie Mao"
  4 | date: "9/19/2020"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r}
  9 | library(tidyverse)
 10 | library(latex2exp)
 11 | ```
 12 | 
 13 | ```{r}
 14 | # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
 15 | colors = list("#D55E00", "#E69F00", "#56B4E9", "#009E73", "#CC79A7")
 16 | names(colors) = c("StochOptForest\n(oracle)", "StochOptForest\n(apx-risk)", "StochOptForest\n(apx-soln)", "RandForest", "RandSplitForest")
 17 | ```
 18 | 
 19 | # fig 4(a)
 20 | ```{r}
 21 | data = (read.csv("rel_risk_full.csv"))
 22 | data = as_tibble(data)
 23 | data$n = as.factor(data$n)
 24 | data$Method = factor(data$Method, levels =c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
 25 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
 26 | ```
 27 | 
 28 | 
 29 | ```{r}
 30 | plot_rel_risk = data %>% 
 31 |   ggplot(aes(x = n, y = rel_risk, fill = Method, linetype = Constraint)) +
 32 |   geom_boxplot() +
 33 |   scale_fill_manual(values = 
 34 |     c("StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
 35 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
 36 |       "RandForest" = colors[["RandForest"]], 
 37 |       "RandSplitForest" = colors[["RandSplitForest"]])) + ylab("Relative risk")  + xlab(TeX("Sample size $n$")) + ylim(1, 5.5) + theme(legend.key.height = unit(0.8, "cm")) 
 38 | ```
 39 | 
 40 | ```{r}
 41 | plot_rel_risk
 42 | ```
 43 | 
 44 | 
 45 | ```{r}
 46 | # ggsave("../../paper/fig/meanvar_risk.pdf", plot = plot_rel_risk, width = 6.5, height = 4.5)
 47 | ```
 48 | 
 49 | 
 50 | ## fig 4(b)
 51 | ```{r}
 52 | data = (read.csv("feature_freq_full.csv"))
 53 | data = as_tibble(data)
 54 | data$Method = factor(data$Method, levels =c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
 55 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
 56 | data$p = factor(data$p)
 57 | ```
 58 | 
 59 | ```{r}
 60 | plot_feature_freq = data %>%  ggplot(aes(x = p, y = freq, fill = Method, linetype = Constraint)) + geom_col(position='dodge', color = "black") + xlab("Covariate index") + ylab("Splitting frequency") + coord_flip() + 
 61 |   scale_fill_manual(values = 
 62 |     c("StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
 63 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
 64 |       "RandForest" = colors[["RandForest"]], 
 65 |       "RandSplitForest" = colors[["RandSplitForest"]]))+ theme(legend.position = "none")
 66 | ```
 67 | 
 68 | ```{r}
 69 | plot_feature_freq
 70 | ```
 71 | 
 72 | ## fig 4(c)
 73 | ```{r}
 74 | data = (read.csv("cond_violation_full.csv"))
 75 | data = as_tibble(data)
 76 | data$Method = factor(data$Method, levels =c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
 77 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
 78 | data$n = as.factor(data$n)
 79 | ```
 80 | 
 81 | ```{r}
 82 | plot_cond_violation = data %>% 
 83 |   ggplot(aes(x = n, y = violation, fill = Method, linetype = Constraint)) +
 84 |   geom_boxplot() +
 85 |   scale_fill_manual(values = 
 86 |     c("StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
 87 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
 88 |       "RandForest" = colors[["RandForest"]], 
 89 |       "RandSplitForest" = colors[["RandSplitForest"]])) + ylab("Violation of Conditional Constraints")  + xlab(TeX("Sample size $n$"))  + theme(legend.position = "none")
 90 | ```
 91 | 
 92 | ```{r}
 93 | plot_cond_violation
 94 | ```
 95 | 
 96 | ## fig 4(d)
 97 | ```{r}
 98 | data = (read.csv("marginal_violation_full.csv"))
 99 | data = as_tibble(data)
100 | data$Method = factor(data$Method, levels =c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
101 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
102 | data$n = as.factor(data$n)
103 | ```
104 | 
105 | 
106 | ```{r}
107 | plot_marg_violation = data %>% 
108 |   group_by(n, R, Method, Constraint) %>% summarize(violation_mean = mean(violation)) %>% 
109 |   ggplot(aes(x = n, y = violation_mean, fill = Method, linetype = Constraint)) + 
110 |   geom_col(position = "dodge", color = "black") +
111 |   scale_fill_manual(values = 
112 |     c("StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
113 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
114 |       "RandForest" = colors[["RandForest"]], 
115 |       "RandSplitForest" = colors[["RandSplitForest"]])) + ylab("Violation of Marginal Constraints")  + xlab(TeX("Sample size $n$")) + theme(legend.position = "none")
116 | ```
117 | 
118 | 
119 | ```{r}
120 | plot_marg_violation
121 | ```
122 | 
123 | # fig 9(a)
124 | ```{r}
125 | data = (read.csv("rel_risk_full_oracle.csv"))
126 | data = as_tibble(data)
127 | data$n = as.factor(data$n)
128 | data$Method = factor(data$Method, levels =c("StochOptForest\n(oracle)", "StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
129 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
130 | ```
131 | 
132 | ```{r}
133 | plot_rel_risk_oracle = data %>% 
134 |   ggplot(aes(x = n, y = rel_risk, fill = Method, linetype = Constraint)) +
135 |   geom_boxplot() +
136 |   scale_fill_manual(values = 
137 |     c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
138 |       "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
139 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
140 |       "RandForest" = colors[["RandForest"]], 
141 |       "RandSplitForest" = colors[["RandSplitForest"]])) + ylab("Relative risk")  + xlab(TeX("Sample size $n$"))  + theme(legend.key.height = unit(0.8, "cm")) + ylim(1, 5.5)
142 | ```
143 | 
144 | ```{r}
145 | plot_rel_risk_oracle
146 | ```
147 | # fig 9(b)
148 | ```{r}
149 | data = (read.csv("feature_freq_full_oracle.csv"))
150 | data = as_tibble(data)
151 | data$Method = factor(data$Method, levels =c("StochOptForest\n(oracle)", "StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
152 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
153 | data$p = factor(data$p)
154 | ```
155 | 
156 | ```{r}
157 | plot_feature_freq_oracle = data %>%  ggplot(aes(x = p, y = freq, fill = Method, linetype = Constraint)) + geom_col(position='dodge', color = "black") + xlab("Covariate index") + ylab("Splitting frequency") + coord_flip() + 
158 |   scale_fill_manual(values = 
159 |     c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
160 |       "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
161 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
162 |       "RandForest" = colors[["RandForest"]], 
163 |       "RandSplitForest" = colors[["RandSplitForest"]]))+ theme(legend.position = "none")
164 | ```
165 | 
166 | ```{r}
167 | plot_feature_freq_oracle
168 | ```
169 | 
170 | # fig 10(a)
171 | ```{r}
172 | data = (read.csv("rel_risk_full_R.csv"))
173 | data = as_tibble(data)
174 | data$R = as.factor(data$R)
175 | data$Method = factor(data$Method, levels =c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
176 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
177 | ```
178 | 
179 | ```{r}
180 | plot_rel_risk_R = data %>% 
181 |   ggplot(aes(x = R, y = rel_risk, fill = Method, linetype = Constraint)) +
182 |   geom_boxplot() +
183 |   scale_fill_manual(values = 
184 |     c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
185 |       "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
186 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
187 |       "RandForest" = colors[["RandForest"]], 
188 |       "RandSplitForest" = colors[["RandSplitForest"]])) + ylab("Relative risk")  + xlab(TeX("Mean Return Threshold $R$"))  + ylim(1, 7) + theme(legend.position = "none")
189 | ```
190 | 
191 | ```{r}
192 | plot_rel_risk_R
193 | ```
194 | # fig 10(b)
195 | ```{r}
196 | data = (read.csv("abs_risk_full.csv"))
197 | data = as_tibble(data)
198 | data$n = as.factor(data$n)
199 | data$Method = factor(data$Method, levels =c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
200 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
201 | ```
202 | 
203 | ```{r}
204 | plot_abs_risk = data %>% 
205 |   ggplot(aes(x = n, y = abs_risk, fill = Method, linetype = Constraint)) +
206 |   geom_boxplot() +
207 |   scale_fill_manual(values = 
208 |     c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
209 |       "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
210 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
211 |       "RandForest" = colors[["RandForest"]], 
212 |       "RandSplitForest" = colors[["RandSplitForest"]])) + ylab("Absolute risk")  + xlab(TeX("Sample size $n$"))  + ylim(0, 20) + theme(legend.position = "none")
213 | ```
214 | 
215 | ```{r}
216 | plot_abs_risk
217 | ```
218 | 


--------------------------------------------------------------------------------
/mean_var/Plotting_var.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Plotting_var"
  3 | author: "Xiaojie Mao"
  4 | date: "9/19/2020"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r}
  9 | library(tidyverse)
 10 | library(latex2exp)
 11 | ```
 12 | 
 13 | ```{r}
 14 | # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
 15 | colors = list("#D55E00", "#E69F00", "#56B4E9", "#009E73", "#CC79A7")
 16 | names(colors) = c("StochOptForest\n(oracle)", "StochOptForest\n(apx-risk)", "StochOptForest\n(apx-soln)", "RandForest", "RandSplitForest")
 17 | ```
 18 | 
 19 | ## fig 8(a)
 20 | ```{r}
 21 | data = (read.csv("risk_var_normal_oracle.csv"))
 22 | data = as_tibble(data)
 23 | data$n = as.factor(data$n)
 24 | data$Method = factor(data$Method, levels =c("StochOptForest\n(oracle)", "StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
 25 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
 26 | ```
 27 | 
 28 | ```{r}
 29 | plot_rel_risk_oracle = data %>% 
 30 |   ggplot(aes(x = n, y = rel_risk, fill = Method, linetype = Constraint)) +
 31 |   geom_boxplot() +
 32 |   scale_fill_manual(values = 
 33 |     c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
 34 |       "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
 35 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
 36 |       "RandForest" = colors[["RandForest"]], 
 37 |       "RandSplitForest" = colors[["RandSplitForest"]])) + ylab("Relative risk")  + xlab(TeX("Sample size $n$"))  + theme(legend.key.height = unit(0.8, "cm"))
 38 | ```
 39 | 
 40 | ```{r}
 41 | plot_rel_risk_oracle
 42 | ```
 43 | ```{r}
 44 | ggsave("../../paper/fig/var_risk_normal_oracle.pdf", plot = plot_rel_risk_oracle, width = 8, height = 4.5)
 45 | ```
 46 | 
 47 | 
 48 | 
 49 | ```{r}
 50 | data = (read.csv("risk_var_normal_oracle.csv"))
 51 | data = as_tibble(data)
 52 | data$n = as.factor(data$n)
 53 | data$Method = factor(data$Method, levels =c("StochOptForest\n(oracle)", "StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
 54 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
 55 | ```
 56 | 
 57 | ```{r}
 58 | plot_rel_risk_oracle = data %>% 
 59 |   ggplot(aes(x = n, y = rel_risk, fill = Method, linetype = Constraint)) +
 60 |   geom_boxplot() +
 61 |   scale_fill_manual(values = 
 62 |     c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
 63 |       "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
 64 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
 65 |       "RandForest" = colors[["RandForest"]], 
 66 |       "RandSplitForest" = colors[["RandSplitForest"]])) + ylab("Relative risk")  + xlab(TeX("Sample size $n$"))  + theme(legend.key.height = unit(0.8, "cm"))
 67 | ```
 68 | 
 69 | ```{r}
 70 | plot_rel_risk_oracle
 71 | ```
 72 | ```{r}
 73 | ggsave("var_risk_normal_oracle.pdf", plot = plot_rel_risk_oracle, width = 10, height = 4.5)
 74 | ```
 75 | 
 76 | 
 77 | 
 78 | ## fig 8(b)
 79 | ```{r}
 80 | data = (read.csv("feature_split_var_normal_oracle.csv"))
 81 | data = as_tibble(data)
 82 | data$Method = factor(data$Method, levels =c("StochOptForest\n(oracle)", "StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest"))
 83 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
 84 | data$p = factor(data$p)
 85 | ```
 86 | 
 87 | ```{r}
 88 | plot_feature_freq_oracle = data %>%  ggplot(aes(x = p, y = freq, fill = Method, linetype = Constraint)) + geom_col(position='dodge', color = "black") + xlab("Covariate index") + ylab("Splitting frequency") + coord_flip() + 
 89 |   scale_fill_manual(values = 
 90 |     c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
 91 |       "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
 92 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
 93 |       "RandForest" = colors[["RandForest"]]))  + theme(legend.position = "none")
 94 | # + theme(legend.key.height = unit(0.8, "cm"))
 95 | ```
 96 | 
 97 | ```{r}
 98 | plot_feature_freq_oracle
 99 | ```
100 | ```{r}
101 | ggsave("../../paper/fig/var_feature_normal_oracle.pdf", plot = plot_feature_freq_oracle, height = 4.5, width = 2)
102 | ```
103 | 
104 | ## fig 8(c)
105 | ```{r}
106 | data = (read.csv("risk_var_normal.csv"))
107 | data = as_tibble(data)
108 | data$n = as.factor(data$n)
109 | data$Method = factor(data$Method, levels =c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "RandSplitForest"))
110 | data$Constraint = factor(data$Constraint, levels = c("yes", "no"))
111 | ```
112 | 
113 | ```{r}
114 | plot_rel_risk_normal = data %>% 
115 |   ggplot(aes(x = n, y = rel_risk, fill = Method, linetype = Constraint)) +
116 |   geom_boxplot() +
117 |   scale_fill_manual(values = 
118 |     c("StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
119 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
120 |       "RandForest" = colors[["RandForest"]], 
121 |       "RandSplitForest" = colors[["RandSplitForest"]])) + ylab("Relative risk")  + xlab(TeX("Sample size $n$"))  + theme(legend.position = "none")  
122 | # + theme(legend.key.height = unit(0.8, "cm"))
123 | ```
124 | 
125 | ```{r}
126 | plot_rel_risk_normal
127 | ```
128 | ```{r}
129 | ggsave("../../paper/fig/var_risk_normal.pdf", plot = plot_rel_risk_normal, height = 4.5, width = 10)
130 | ```
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/mean_var/__pycache__/tree.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/__pycache__/tree.cpython-36.pyc


--------------------------------------------------------------------------------
/mean_var/__pycache__/tree.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/__pycache__/tree.cpython-38.pyc


--------------------------------------------------------------------------------
/mean_var/cond_violation_meanvar_normal_stoch.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/cond_violation_meanvar_normal_stoch.pkl


--------------------------------------------------------------------------------
/mean_var/experiment_meanvar_stoch.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from meanvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | # forest2
  7 | 
  8 | p = 10
  9 | n_jobs = 50
 10 | runs = 50
 11 | n_trees = 500;
 12 | Nx_test = 200
 13 | Ny_train = 1000
 14 | Ny_test = 2000
 15 | R = 0.1
 16 | alpha = 0.1
 17 | obj_coef = 0
 18 | R_list = [0.1]
 19 | N_list = [100, 200, 400, 800]
 20 | lb = -GRB.INFINITY; ub = GRB.INFINITY; sum_bound = 1; if_stoch_constr = True
 21 | 
 22 | 
 23 | generate_Y = generate_Y_normal
 24 | 
 25 | honesty = False;
 26 | verbose = False; oracle = False;
 27 | bootstrap = True; 
 28 | 
 29 | cond_mean = [lambda x: np.exp(x[:, 0]), lambda x: x[:, 0], lambda x: np.abs(x[:, 0])]
 30 | cond_std = [lambda x: 5 - 4*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 5 - 4*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 5 - 4*((1<=x[:, 1])&(x[:, 1]<=3))]
 31 | 
 32 | rel_risk_all = {}
 33 | risk_all = {}
 34 | cond_violation_all = {}
 35 | marginal_violation_all = {}
 36 | feature_split_all = {}
 37 | results_eval_all = {}
 38 | 
 39 | direct = ''
 40 | date = ''
 41 | output = "meanvar_normal_stoch.txt"
 42 | 
 43 | with open(output, 'w') as f:
 44 |     print("start", file = f)
 45 | 
 46 | for N in N_list:
 47 |     rel_risk_all[str(N)] = {}
 48 |     risk_all[str(N)] = {}
 49 |     cond_violation_all[str(N)] = {}
 50 |     marginal_violation_all[str(N)] = {}
 51 |     feature_split_all[str(N)] = {}
 52 |     results_eval_all[str(N)] = {}
 53 | 
 54 |     for R in R_list:
 55 | 
 56 |         n_proposals = N; 
 57 |         mtry = p;
 58 |         subsample_ratio = 1;
 59 |         max_depth=100; 
 60 |         min_leaf_size=10; 
 61 |         balancedness_tol = 0.2; 
 62 | 
 63 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 64 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 65 | 
 66 |         with open(output, 'a') as f:
 67 |             print("N: ", N, file = f)
 68 |             print("R: ", R, file = f)
 69 | 
 70 |         time1 = time.time()
 71 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 72 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 73 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 74 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 75 |         time2 = time.time()
 76 |         with open(output, 'a') as f:
 77 |             print("time: ", time2 - time1, file = f)
 78 |             print("------------------------", file = f)
 79 | 
 80 |         time1 = time.time()
 81 |         results_eval_all[str(N)][str(R)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_stoch_constr)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 82 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 83 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 84 |             verbose = False, generate_Y = generate_Y) for run in range(runs))
 85 |         time2 = time.time()
 86 |         with open(output, 'a') as f:
 87 |             print("time: ", time2 - time1, file = f)
 88 |             print("------------------------", file = f)
 89 | 
 90 |         risks = extract_risk(results_eval_all[str(N)][str(R)])
 91 |         with open(output, 'a') as f:
 92 |             print("risk:", N, file=f)
 93 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 94 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 95 |             print("------------------------", file = f)
 96 |         risk_all[str(N)][str(R)] = risks
 97 | 
 98 |         rel_risks = extract_rel_risk(risks, results_fit)
 99 |         with open(output, 'a') as f:
100 |             print("rel_risk:", N, file=f)
101 |             for k,v in sorted(rel_risks.items(), key = lambda x: x[1].mean()):
102 |                 print(k,"rel risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
103 |             print("------------------------", file = f)
104 |         rel_risk_all[str(N)][str(R)] = rel_risks
105 | 
106 |         violation = evaluate_cond_violation(results_eval_all[str(N)][str(R)])
107 |         with open(output, 'a') as f:
108 |             print("conditional violation:", N, file=f)
109 |             for k,v in sorted(violation.items(), key = lambda x: x[1].mean()):
110 |                 print(k,"avg conditional violation:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
111 |             print("------------------------", file = f)
112 |         cond_violation_all[str(N)][str(R)] = violation
113 | 
114 |         violation = evaluate_mean_violation(results_eval_all[str(N)][str(R)])
115 |         with open(output, 'a') as f:
116 |             print("mean violation:", N, file=f)
117 |             for k,v in sorted(violation.items(), key = lambda x: x[1].mean()):
118 |                 print(k,"avg mean violation:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
119 |             print("------------------------", file = f)
120 |         marginal_violation_all[str(N)][str(R)] = violation
121 | 
122 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
123 |         with open(output, 'a') as f:
124 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
125 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
126 |             print("---", file = f)
127 |             print("----------------------", file = f)
128 |             print("----------------------", file = f)
129 |         feature_split_all[str(N)][str(R)] = feature_split_freq
130 | 
131 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_meanvar_normal_stoch.pkl", "wb"))
132 |         pickle.dump(risk_all, open(direct + date +  "risk_meanvar_normal_stoch.pkl", "wb"))
133 |         pickle.dump(rel_risk_all, open(direct + date +  "rel_risk_meanvar_normal_stoch.pkl", "wb"))
134 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_meanvar_normal_stoch.pkl", "wb"))
135 |         pickle.dump(marginal_violation_all, open(direct + date +  "mean_violation_meanvar_normal_stoch.pkl", "wb"))
136 |         pickle.dump(cond_violation_all, open(direct + date +  "cond_violation_meanvar_normal_stoch.pkl", "wb"))
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/mean_var/experiment_meanvar_stoch_R.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from meanvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | # forest2
  7 | 
  8 | p = 10
  9 | n_jobs = 50
 10 | runs = 50
 11 | n_trees = 500;
 12 | Nx_test = 200
 13 | Ny_train = 1000
 14 | Ny_test = 2000
 15 | R = 0.1
 16 | alpha = 0.1
 17 | obj_coef = 0
 18 | R_list = [0.1, 0.3, 0.5]
 19 | N_list = [400]
 20 | lb = -GRB.INFINITY; ub = GRB.INFINITY;  sum_bound = 1; if_stoch_constr = True
 21 | 
 22 | 
 23 | generate_Y = generate_Y_normal
 24 | 
 25 | honesty = False;
 26 | verbose = False; oracle = False;
 27 | bootstrap = True; 
 28 | 
 29 | cond_mean = [lambda x: np.exp(x[:, 0]), lambda x: x[:, 0], lambda x: np.abs(x[:, 0])]
 30 | cond_std = [lambda x: 5 - 4*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 5 - 4*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 5 - 4*((1<=x[:, 1])&(x[:, 1]<=3))]
 31 | 
 32 | rel_risk_all = {}
 33 | risk_all = {}
 34 | cond_violation_all = {}
 35 | marginal_violation_all = {}
 36 | feature_split_all = {}
 37 | results_eval_all = {}
 38 | 
 39 | direct = ''
 40 | date = ''
 41 | output = "meanvar_normal_stoch_R.txt"
 42 | 
 43 | with open(output, 'w') as f:
 44 |     print("start", file = f)
 45 | 
 46 | for N in N_list:
 47 |     rel_risk_all[str(N)] = {}
 48 |     risk_all[str(N)] = {}
 49 |     cond_violation_all[str(N)] = {}
 50 |     marginal_violation_all[str(N)] = {}
 51 |     feature_split_all[str(N)] = {}
 52 |     results_eval_all[str(N)] = {}
 53 | 
 54 |     for R in R_list:
 55 | 
 56 |         n_proposals = N; 
 57 |         mtry = p;
 58 |         subsample_ratio = 1;
 59 |         max_depth=100; 
 60 |         min_leaf_size=10; 
 61 |         balancedness_tol = 0.2; 
 62 | 
 63 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 64 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 65 | 
 66 |         with open(output, 'a') as f:
 67 |             print("N: ", N, file = f)
 68 |             print("R: ", R, file = f)
 69 | 
 70 |         time1 = time.time()
 71 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 72 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 73 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 74 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 75 |         time2 = time.time()
 76 |         with open(output, 'a') as f:
 77 |             print("time: ", time2 - time1, file = f)
 78 |             print("------------------------", file = f)
 79 | 
 80 |         time1 = time.time()
 81 |         results_eval_all[str(N)][str(R)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_stoch_constr)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 82 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 83 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 84 |             verbose = False, generate_Y = generate_Y) for run in range(runs))
 85 |         time2 = time.time()
 86 |         with open(output, 'a') as f:
 87 |             print("time: ", time2 - time1, file = f)
 88 |             print("------------------------", file = f)
 89 | 
 90 |         risks = extract_risk(results_eval_all[str(N)][str(R)])
 91 |         with open(output, 'a') as f:
 92 |             print("risk:", N, file=f)
 93 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 94 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 95 |             print("------------------------", file = f)
 96 |         risk_all[str(N)][str(R)] = risks
 97 | 
 98 |         rel_risks = extract_rel_risk(risks, results_fit)
 99 |         with open(output, 'a') as f:
100 |             print("rel_risk:", N, file=f)
101 |             for k,v in sorted(rel_risks.items(), key = lambda x: x[1].mean()):
102 |                 print(k,"rel risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
103 |             print("------------------------", file = f)
104 |         rel_risk_all[str(N)][str(R)] = rel_risks
105 | 
106 |         violation = evaluate_cond_violation(results_eval_all[str(N)][str(R)])
107 |         with open(output, 'a') as f:
108 |             print("conditional violation:", N, file=f)
109 |             for k,v in sorted(violation.items(), key = lambda x: x[1].mean()):
110 |                 print(k,"avg conditional violation:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
111 |             print("------------------------", file = f)
112 |         cond_violation_all[str(N)][str(R)] = violation
113 | 
114 |         violation = evaluate_mean_violation(results_eval_all[str(N)][str(R)])
115 |         with open(output, 'a') as f:
116 |             print("mean violation:", N, file=f)
117 |             for k,v in sorted(violation.items(), key = lambda x: x[1].mean()):
118 |                 print(k,"avg mean violation:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
119 |             print("------------------------", file = f)
120 |         marginal_violation_all[str(N)][str(R)] = violation
121 | 
122 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
123 |         with open(output, 'a') as f:
124 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
125 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
126 |             print("---", file = f)
127 |             print("----------------------", file = f)
128 |             print("----------------------", file = f)
129 |         feature_split_all[str(N)][str(R)] = feature_split_freq
130 | 
131 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_meanvar_normal_stoch_R.pkl", "wb"))
132 |         pickle.dump(risk_all, open(direct + date +  "risk_meanvar_normal_stoch_R.pkl", "wb"))
133 |         pickle.dump(rel_risk_all, open(direct + date +  "rel_risk_meanvar_normal_stoch_R.pkl", "wb"))
134 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_meanvar_normal_stoch_R.pkl", "wb"))
135 |         pickle.dump(marginal_violation_all, open(direct + date +  "mean_violation_meanvar_normal_stoch_R.pkl", "wb"))
136 |         pickle.dump(cond_violation_all, open(direct + date +  "cond_violation_meanvar_normal_stoch_R.pkl", "wb"))
137 | 
138 | 
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------
/mean_var/experiment_meanvar_stoch_oracle.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from meanvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | # forest2
  7 | 
  8 | p = 10
  9 | n_jobs = 50
 10 | runs = 50
 11 | n_trees = 50;
 12 | Nx_test = 200
 13 | Ny_train = 1000
 14 | Ny_test = 2000
 15 | R = 0.1
 16 | alpha = 0.1
 17 | obj_coef = 0
 18 | R_list = [0.1]
 19 | N_list = [100, 200, 400]
 20 | lb = -GRB.INFINITY; ub = GRB.INFINITY;  sum_bound = 1; if_stoch_constr = True
 21 | 
 22 | 
 23 | 
 24 | generate_Y = generate_Y_normal
 25 | 
 26 | honesty = False;
 27 | verbose = False; oracle = True;
 28 | bootstrap = True; 
 29 | 
 30 | cond_mean = [lambda x: np.exp(x[:, 0]), lambda x: x[:, 0], lambda x: np.abs(x[:, 0])]
 31 | cond_std = [lambda x: 5 - 4*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 5 - 4*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 5 - 4*((1<=x[:, 1])&(x[:, 1]<=3))]
 32 | 
 33 | rel_risk_all = {}
 34 | risk_all = {}
 35 | cond_violation_all = {}
 36 | marginal_violation_all = {}
 37 | feature_split_all = {}
 38 | results_eval_all = {}
 39 | 
 40 | direct = ''
 41 | date = ''
 42 | output = "meanvar_normal_stoch_oracle.txt"
 43 | 
 44 | with open(output, 'w') as f:
 45 |     print("start", file = f)
 46 | 
 47 | for N in N_list:
 48 |     rel_risk_all[str(N)] = {}
 49 |     risk_all[str(N)] = {}
 50 |     cond_violation_all[str(N)] = {}
 51 |     marginal_violation_all[str(N)] = {}
 52 |     feature_split_all[str(N)] = {}
 53 |     results_eval_all[str(N)] = {}
 54 | 
 55 |     for R in R_list:
 56 | 
 57 |         n_proposals = N; 
 58 |         mtry = p;
 59 |         subsample_ratio = 1;
 60 |         max_depth=100; 
 61 |         min_leaf_size=10; 
 62 |         balancedness_tol = 0.2; 
 63 | 
 64 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 65 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 66 | 
 67 |         with open(output, 'a') as f:
 68 |             print("N: ", N, file = f)
 69 |             print("R: ", R, file = f)
 70 | 
 71 |         time1 = time.time()
 72 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 73 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 74 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 75 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 76 |         time2 = time.time()
 77 |         with open(output, 'a') as f:
 78 |             print("time: ", time2 - time1, file = f)
 79 |             print("------------------------", file = f)
 80 | 
 81 |         time1 = time.time()
 82 |         results_eval_all[str(N)][str(R)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_stoch_constr)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 83 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 84 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 85 |             verbose = False, generate_Y = generate_Y) for run in range(runs))
 86 |         time2 = time.time()
 87 |         with open(output, 'a') as f:
 88 |             print("time: ", time2 - time1, file = f)
 89 |             print("------------------------", file = f)
 90 | 
 91 |         risks = extract_risk(results_eval_all[str(N)][str(R)])
 92 |         with open(output, 'a') as f:
 93 |             print("risk:", N, file=f)
 94 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 95 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 96 |             print("------------------------", file = f)
 97 |         risk_all[str(N)][str(R)] = risks
 98 | 
 99 |         rel_risks = extract_rel_risk(risks, results_fit)
100 |         with open(output, 'a') as f:
101 |             print("rel_risk:", N, file=f)
102 |             for k,v in sorted(rel_risks.items(), key = lambda x: x[1].mean()):
103 |                 print(k,"rel risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
104 |             print("------------------------", file = f)
105 |         rel_risk_all[str(N)][str(R)] = rel_risks
106 | 
107 |         violation = evaluate_cond_violation(results_eval_all[str(N)][str(R)])
108 |         with open(output, 'a') as f:
109 |             print("conditional violation:", N, file=f)
110 |             for k,v in sorted(violation.items(), key = lambda x: x[1].mean()):
111 |                 print(k,"avg conditional violation:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
112 |             print("------------------------", file = f)
113 |         cond_violation_all[str(N)][str(R)] = violation
114 | 
115 |         violation = evaluate_mean_violation(results_eval_all[str(N)][str(R)])
116 |         with open(output, 'a') as f:
117 |             print("mean violation:", N, file=f)
118 |             for k,v in sorted(violation.items(), key = lambda x: x[1].mean()):
119 |                 print(k,"avg mean violation:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
120 |             print("------------------------", file = f)
121 |         marginal_violation_all[str(N)][str(R)] = violation
122 | 
123 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
124 |         with open(output, 'a') as f:
125 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
126 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
127 |             print("---", file = f)
128 |             print("----------------------", file = f)
129 |             print("----------------------", file = f)
130 |         feature_split_all[str(N)][str(R)] = feature_split_freq
131 | 
132 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_meanvar_normal_stoch_oracle.pkl", "wb"))
133 |         pickle.dump(risk_all, open(direct + date +  "risk_meanvar_normal_stoch_oracle.pkl", "wb"))
134 |         pickle.dump(rel_risk_all, open(direct + date +  "rel_risk_meanvar_normal_stoch_oracle.pkl", "wb"))
135 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_meanvar_normal_stoch_oracle.pkl", "wb"))
136 |         pickle.dump(marginal_violation_all, open(direct + date +  "mean_violation_meanvar_normal_stoch_oracle.pkl", "wb"))
137 |         pickle.dump(cond_violation_all, open(direct + date +  "cond_violation_meanvar_normal_stoch_oracle.pkl", "wb"))
138 | 
139 | 
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/mean_var/experiment_var_normal.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from meanvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | 
  7 | p = 10
  8 | n_jobs = 50
  9 | runs = 50
 10 | n_trees = 500;
 11 | Nx_test = 200
 12 | Ny_train = 1000
 13 | Ny_test = 2000
 14 | R = 0.1
 15 | alpha = 0.1
 16 | obj_coef_list = [0]
 17 | N_list = [100, 200, 400, 800]
 18 | lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False
 19 | 
 20 | 
 21 | 
 22 | generate_Y = generate_Y_normal
 23 | 
 24 | honesty = False;
 25 | verbose = False; oracle = False;
 26 | bootstrap = True; 
 27 | 
 28 | 
 29 | cond_mean = [lambda x: np.exp(x[:, 0]), lambda x: x[:, 0], lambda x: np.abs(x[:, 0])]
 30 | cond_std = [lambda x: 5 - 4*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 5 - 4*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 5 - 4*((1<=x[:, 1])&(x[:, 1]<=3))]
 31 | 
 32 | risk_all = {}
 33 | feature_split_all = {}
 34 | results_eval_all = {}
 35 | 
 36 | direct = ''
 37 | date = ''
 38 | output = "var_normal.txt"
 39 | 
 40 | with open(output, 'w') as f:
 41 |     print("start", file = f)
 42 | 
 43 | for N in N_list:
 44 |     risk_all[str(N)] = {}
 45 |     feature_split_all[str(N)] = {}
 46 |     results_eval_all[str(N)] = {}
 47 | 
 48 |     for obj_coef in obj_coef_list:
 49 | 
 50 |         n_proposals = N; 
 51 |         mtry = p;
 52 |         subsample_ratio = 1;
 53 |         max_depth=100; 
 54 |         min_leaf_size=10; 
 55 |         balancedness_tol = 0.2; 
 56 | 
 57 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 58 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 59 | 
 60 |         with open(output, 'a') as f:
 61 |             print("N: ", N, file = f)
 62 |             print("obj_coef: ", obj_coef, file = f)
 63 | 
 64 |         time1 = time.time()
 65 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 66 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 67 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 68 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 69 |         time2 = time.time()
 70 |         with open(output, 'a') as f:
 71 |             print("time: ", time2 - time1, file = f)
 72 |             print("------------------------", file = f)
 73 | 
 74 |         time1 = time.time()
 75 |         results_eval_all[str(N)][str(obj_coef)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_determ_constr)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 76 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 77 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 78 |             verbose = False, generate_Y = generate_Y) for run in range(runs))
 79 |         time2 = time.time()
 80 |         with open(output, 'a') as f:
 81 |             print("time: ", time2 - time1, file = f)
 82 |             print("------------------------", file = f)
 83 | 
 84 |         risks = extract_risk(results_eval_all[str(N)][str(obj_coef)])
 85 |         with open(output, 'a') as f:
 86 |             print("risk:", N, file=f)
 87 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 88 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 89 |             print("------------------------", file = f)
 90 |         risk_all[str(N)][str(obj_coef)] = risks
 91 | 
 92 | 
 93 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
 94 |         with open(output, 'a') as f:
 95 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
 96 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
 97 |             print("---", file = f)
 98 |             print("----------------------", file = f)
 99 |             print("----------------------", file = f)
100 |         feature_split_all[str(N)][str(obj_coef)] = feature_split_freq
101 | 
102 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_var_normal.pkl", "wb"))
103 |         pickle.dump(risk_all, open(direct + date +  "risk_var_normal.pkl", "wb"))
104 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_var_normal.pkl", "wb"))
105 |         
106 | 
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/mean_var/experiment_var_normal_oracle.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from meanvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | # forest2
  7 | 
  8 | p = 10
  9 | n_jobs = 50
 10 | runs = 50
 11 | n_trees = 50;
 12 | Nx_test = 200
 13 | Ny_train = 1000
 14 | Ny_test = 2000
 15 | R = 0.1
 16 | alpha = 0.1
 17 | obj_coef_list = [0]
 18 | N_list = [100, 200, 400]
 19 | lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False
 20 | 
 21 | # p = 10
 22 | # n_jobs = 25
 23 | # runs = 50
 24 | # n_trees = 50;
 25 | # Nx_test = 30
 26 | # Ny_train = 1000
 27 | # Ny_test = 2000
 28 | # R = 0.1
 29 | # alpha = 0.1
 30 | # obj_coef_list = [0]
 31 | # lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False
 32 | # N_list = [100, 200, 400]
 33 | 
 34 | 
 35 | generate_Y = generate_Y_normal
 36 | 
 37 | honesty = False;
 38 | verbose = False; oracle = True;
 39 | bootstrap = True; 
 40 | 
 41 | 
 42 | cond_mean = [lambda x: np.exp(x[:, 0]), lambda x: x[:, 0], lambda x: np.abs(x[:, 0])]
 43 | cond_std = [lambda x: 5 - 4*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 5 - 4*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 5 - 4*((1<=x[:, 1])&(x[:, 1]<=3))]
 44 | 
 45 | # rel_risk_all = {}
 46 | risk_all = {}
 47 | cond_sharpe_all = {}
 48 | marginal_sharpe_all = {}
 49 | feature_split_all = {}
 50 | results_eval_all = {}
 51 | # results_fit_all = {}
 52 | 
 53 | direct = '/data/xm77/forest/'
 54 | date = '09_03/'
 55 | output = "var_normal_oracle.txt"
 56 | 
 57 | with open(output, 'w') as f:
 58 |     print("start", file = f)
 59 | 
 60 | for N in N_list:
 61 |     # rel_risk_all[str(N)] = {}
 62 |     # results_fit_all[str(N)] = {}
 63 |     risk_all[str(N)] = {}
 64 |     cond_sharpe_all[str(N)] = {}
 65 |     marginal_sharpe_all[str(N)] = {}
 66 |     feature_split_all[str(N)] = {}
 67 |     results_eval_all[str(N)] = {}
 68 | 
 69 |     for obj_coef in obj_coef_list:
 70 | 
 71 |         n_proposals = N; 
 72 |         mtry = p;
 73 |         subsample_ratio = 1;
 74 |         max_depth=100; 
 75 |         min_leaf_size=10; 
 76 |         balancedness_tol = 0.2; 
 77 | 
 78 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 79 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 80 | 
 81 |         with open(output, 'a') as f:
 82 |             print("N: ", N, file = f)
 83 |             print("obj_coef: ", obj_coef, file = f)
 84 | 
 85 |         time1 = time.time()
 86 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 87 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
 88 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, 
 89 |             verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 90 |         time2 = time.time()
 91 |         # results_fit_all[str(N)][str(obj_coef)] = results_fit
 92 |         with open(output, 'a') as f:
 93 |             print("time: ", time2 - time1, file = f)
 94 |             print("------------------------", file = f)
 95 | 
 96 |         time1 = time.time()
 97 |         results_eval_all[str(N)][str(obj_coef)] = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run_determ_constr)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 98 |             Nx_test,Ny_train, Ny_test, cond_mean, cond_std, 
 99 |             alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub,  sum_bound = sum_bound, if_stoch_constr = if_stoch_constr,
100 |             verbose = False, generate_Y = generate_Y) for run in range(runs))
101 |         time2 = time.time()
102 |         with open(output, 'a') as f:
103 |             print("time: ", time2 - time1, file = f)
104 |             print("------------------------", file = f)
105 | 
106 |         risks = extract_risk(results_eval_all[str(N)][str(obj_coef)])
107 |         with open(output, 'a') as f:
108 |             print("risk:", N, file=f)
109 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
110 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
111 |             print("------------------------", file = f)
112 |         risk_all[str(N)][str(obj_coef)] = risks
113 | 
114 |         cond_sharpe = evaluate_cond_sharpe(results_eval_all[str(N)][str(obj_coef)], R = R)
115 |         with open(output, 'a') as f:
116 |             print("conditional sharpe ratio:", N, file=f)
117 |             for k,v in sorted(cond_sharpe.items(), key = lambda x: x[1].mean()):
118 |                 print(k,"sharpe ratio:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
119 |             print("------------------------", file = f)
120 |         cond_sharpe_all[str(N)][str(obj_coef)] = cond_sharpe
121 | 
122 |         marginal_sharpe = evaluate_marginal_sharpe(results_eval_all[str(N)][str(obj_coef)], R = R)
123 |         with open(output, 'a') as f:
124 |             print("marginal sharpe ratio:", N, file=f)
125 |             for k,v in sorted(marginal_sharpe.items(), key = lambda x: x[1].mean()):
126 |                 print(k,"sharpe ratio:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
127 |             print("------------------------", file = f)
128 |         marginal_sharpe_all[str(N)][str(obj_coef)] = marginal_sharpe
129 | 
130 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
131 |         with open(output, 'a') as f:
132 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
133 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
134 |             print("---", file = f)
135 |             print("----------------------", file = f)
136 |             print("----------------------", file = f)
137 |         feature_split_all[str(N)][str(obj_coef)] = feature_split_freq
138 | 
139 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_var_normal_oracle.pkl", "wb"))
140 |         pickle.dump(risk_all, open(direct + date +  "risk_var_normal_oracle.pkl", "wb"))
141 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_var_normal_oracle.pkl", "wb"))
142 |         
143 | 
144 | 
145 | 
146 | 


--------------------------------------------------------------------------------
/mean_var/feature_freq_full.csv:
--------------------------------------------------------------------------------
  1 | Method,p,Constraint,freq
  2 | "StochOptForest
  3 | (apx-soln)",1,yes,0.10682983716275388
  4 | "StochOptForest
  5 | (apx-soln)",2,yes,0.16833149321800822
  6 | "StochOptForest
  7 | (apx-soln)",3,yes,0.09107013959545414
  8 | "StochOptForest
  9 | (apx-soln)",4,yes,0.09096189616598718
 10 | "StochOptForest
 11 | (apx-soln)",5,yes,0.09034186459637113
 12 | "StochOptForest
 13 | (apx-soln)",6,yes,0.09041615378284211
 14 | "StochOptForest
 15 | (apx-soln)",7,yes,0.0907651785191277
 16 | "StochOptForest
 17 | (apx-soln)",8,yes,0.09057631544043687
 18 | "StochOptForest
 19 | (apx-soln)",9,yes,0.09094629823850262
 20 | "StochOptForest
 21 | (apx-soln)",10,yes,0.08976082328051949
 22 | "StochOptForest
 23 | (apx-soln)",1,no,0.055315449618791895
 24 | "StochOptForest
 25 | (apx-soln)",2,no,0.06940419692893968
 26 | "StochOptForest
 27 | (apx-soln)",3,no,0.10911167564955929
 28 | "StochOptForest
 29 | (apx-soln)",4,no,0.1098287447462709
 30 | "StochOptForest
 31 | (apx-soln)",5,no,0.10929627955650645
 32 | "StochOptForest
 33 | (apx-soln)",6,no,0.11086125271389977
 34 | "StochOptForest
 35 | (apx-soln)",7,no,0.10936868641764659
 36 | "StochOptForest
 37 | (apx-soln)",8,no,0.10961303105217812
 38 | "StochOptForest
 39 | (apx-soln)",9,no,0.10820730263423826
 40 | "StochOptForest
 41 | (apx-soln)",10,no,0.10899338068197344
 42 | "StochOptForest
 43 | (apx-risk)",1,yes,0.18387232596147432
 44 | "StochOptForest
 45 | (apx-risk)",2,yes,0.16453572213032291
 46 | "StochOptForest
 47 | (apx-risk)",3,yes,0.08196696135744837
 48 | "StochOptForest
 49 | (apx-risk)",4,yes,0.08158537424016632
 50 | "StochOptForest
 51 | (apx-risk)",5,yes,0.08109586734786722
 52 | "StochOptForest
 53 | (apx-risk)",6,yes,0.0812621541350357
 54 | "StochOptForest
 55 | (apx-risk)",7,yes,0.080637499326131
 56 | "StochOptForest
 57 | (apx-risk)",8,yes,0.081976571177338
 58 | "StochOptForest
 59 | (apx-risk)",9,yes,0.08150074343786871
 60 | "StochOptForest
 61 | (apx-risk)",10,yes,0.08156678088634976
 62 | "StochOptForest
 63 | (apx-risk)",1,no,0.1897439696972534
 64 | "StochOptForest
 65 | (apx-risk)",2,no,0.1532407216354238
 66 | "StochOptForest
 67 | (apx-risk)",3,no,0.08194619638549147
 68 | "StochOptForest
 69 | (apx-risk)",4,no,0.08175749778988094
 70 | "StochOptForest
 71 | (apx-risk)",5,no,0.08202335664734912
 72 | "StochOptForest
 73 | (apx-risk)",6,no,0.08130082226773405
 74 | "StochOptForest
 75 | (apx-risk)",7,no,0.08358314872681988
 76 | "StochOptForest
 77 | (apx-risk)",8,no,0.0838279791780592
 78 | "StochOptForest
 79 | (apx-risk)",9,no,0.08178137683002068
 80 | "StochOptForest
 81 | (apx-risk)",10,no,0.08079493084196382
 82 | RandSplitForest,1,no,0.10007878994101344
 83 | RandSplitForest,2,no,0.09993283154643436
 84 | RandSplitForest,3,no,0.0995578177638855
 85 | RandSplitForest,4,no,0.099759249508467
 86 | RandSplitForest,5,no,0.10011426313697927
 87 | RandSplitForest,6,no,0.10000784272296277
 88 | RandSplitForest,7,no,0.10000639626221734
 89 | RandSplitForest,8,no,0.10033220542440828
 90 | RandSplitForest,9,no,0.1006287905318978
 91 | RandSplitForest,10,no,0.09958181316172203
 92 | RandForest,1,no,0.157871130657607
 93 | RandForest,2,no,0.11898651261160856
 94 | RandForest,3,no,0.09095347117169719
 95 | RandForest,4,no,0.09130607015025945
 96 | RandForest,5,no,0.09076956387224343
 97 | RandForest,6,no,0.09020070583534874
 98 | RandForest,7,no,0.08995832750909
 99 | RandForest,8,no,0.08932693864026157
100 | RandForest,9,no,0.09062845770859658
101 | RandForest,10,no,0.08999882184328556
102 | 


--------------------------------------------------------------------------------
/mean_var/feature_freq_full_oracle.csv:
--------------------------------------------------------------------------------
  1 | Method,p,Constraint,freq
  2 | "StochOptForest
  3 | (apx-soln)",1,yes,0.15319120310664225
  4 | "StochOptForest
  5 | (apx-soln)",2,yes,0.2020647328881861
  6 | "StochOptForest
  7 | (apx-soln)",3,yes,0.08145269861746848
  8 | "StochOptForest
  9 | (apx-soln)",4,yes,0.078265129255282
 10 | "StochOptForest
 11 | (apx-soln)",5,yes,0.08066344896998233
 12 | "StochOptForest
 13 | (apx-soln)",6,yes,0.07864831251206615
 14 | "StochOptForest
 15 | (apx-soln)",7,yes,0.08014277825079487
 16 | "StochOptForest
 17 | (apx-soln)",8,yes,0.08440642267985637
 18 | "StochOptForest
 19 | (apx-soln)",9,yes,0.08139888923969224
 20 | "StochOptForest
 21 | (apx-soln)",10,yes,0.07976638448002887
 22 | "StochOptForest
 23 | (apx-soln)",1,no,0.05486355210351915
 24 | "StochOptForest
 25 | (apx-soln)",2,no,0.06996413366942095
 26 | "StochOptForest
 27 | (apx-soln)",3,no,0.10798747836038808
 28 | "StochOptForest
 29 | (apx-soln)",4,no,0.10864634497972415
 30 | "StochOptForest
 31 | (apx-soln)",5,no,0.10632620188541463
 32 | "StochOptForest
 33 | (apx-soln)",6,no,0.10978046071232071
 34 | "StochOptForest
 35 | (apx-soln)",7,no,0.11223233654964339
 36 | "StochOptForest
 37 | (apx-soln)",8,no,0.11207033816407605
 38 | "StochOptForest
 39 | (apx-soln)",9,no,0.10985992804260061
 40 | "StochOptForest
 41 | (apx-soln)",10,no,0.10826922553289144
 42 | "StochOptForest
 43 | (apx-risk)",1,yes,0.2058995671574812
 44 | "StochOptForest
 45 | (apx-risk)",2,yes,0.197936533286545
 46 | "StochOptForest
 47 | (apx-risk)",3,yes,0.0752933348506378
 48 | "StochOptForest
 49 | (apx-risk)",4,yes,0.07284782288932817
 50 | "StochOptForest
 51 | (apx-risk)",5,yes,0.07187309275439865
 52 | "StochOptForest
 53 | (apx-risk)",6,yes,0.07328084065418769
 54 | "StochOptForest
 55 | (apx-risk)",7,yes,0.07447016205900399
 56 | "StochOptForest
 57 | (apx-risk)",8,yes,0.07809475690219257
 58 | "StochOptForest
 59 | (apx-risk)",9,yes,0.07535119608156696
 60 | "StochOptForest
 61 | (apx-risk)",10,yes,0.07495269336465722
 62 | "StochOptForest
 63 | (apx-risk)",1,no,0.2075243473037956
 64 | "StochOptForest
 65 | (apx-risk)",2,no,0.19615532492468699
 66 | "StochOptForest
 67 | (apx-risk)",3,no,0.07412530046120025
 68 | "StochOptForest
 69 | (apx-risk)",4,no,0.07371420368817033
 70 | "StochOptForest
 71 | (apx-risk)",5,no,0.0742039477897662
 72 | "StochOptForest
 73 | (apx-risk)",6,no,0.07207938512700457
 74 | "StochOptForest
 75 | (apx-risk)",7,no,0.07271225545740612
 76 | "StochOptForest
 77 | (apx-risk)",8,no,0.07529929583598034
 78 | "StochOptForest
 79 | (apx-risk)",9,no,0.07791429590143999
 80 | "StochOptForest
 81 | (apx-risk)",10,no,0.07627164351054926
 82 | RandForest,1,no,0.17891432175316468
 83 | RandForest,2,no,0.11382094760274258
 84 | RandForest,3,no,0.0922329992856153
 85 | RandForest,4,no,0.08972970966464924
 86 | RandForest,5,no,0.08882683833549765
 87 | RandForest,6,no,0.08700766296514435
 88 | RandForest,7,no,0.09151800483364825
 89 | RandForest,8,no,0.08505759931072354
 90 | RandForest,9,no,0.0824061798282438
 91 | RandForest,10,no,0.09048573642056955
 92 | "StochOptForest
 93 | (oracle)",1,yes,0.1773497288627402
 94 | "StochOptForest
 95 | (oracle)",2,yes,0.20052965217254892
 96 | "StochOptForest
 97 | (oracle)",3,yes,0.07737596074925764
 98 | "StochOptForest
 99 | (oracle)",4,yes,0.07798729167663589
100 | "StochOptForest
101 | (oracle)",5,yes,0.07681995538627781
102 | "StochOptForest
103 | (oracle)",6,yes,0.07315713461641354
104 | "StochOptForest
105 | (oracle)",7,yes,0.07535445588130629
106 | "StochOptForest
107 | (oracle)",8,yes,0.0827537237153022
108 | "StochOptForest
109 | (oracle)",9,yes,0.07844437414222388
110 | "StochOptForest
111 | (oracle)",10,yes,0.08022772279729307
112 | 


--------------------------------------------------------------------------------
/mean_var/feature_split_meanvar_normal_stoch.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/feature_split_meanvar_normal_stoch.pkl


--------------------------------------------------------------------------------
/mean_var/feature_split_meanvar_normal_stoch_oracle.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/feature_split_meanvar_normal_stoch_oracle.pkl


--------------------------------------------------------------------------------
/mean_var/feature_split_var_normal_oracle.csv:
--------------------------------------------------------------------------------
  1 | Method,p,Constraint,freq
  2 | "StochOptForest
  3 | (apx-soln)",1,yes,0.22536024185925851
  4 | "StochOptForest
  5 | (apx-soln)",2,yes,0.20201784886537968
  6 | "StochOptForest
  7 | (apx-soln)",3,yes,0.07209899226196452
  8 | "StochOptForest
  9 | (apx-soln)",4,yes,0.0700834420101351
 10 | "StochOptForest
 11 | (apx-soln)",5,yes,0.07037159560611844
 12 | "StochOptForest
 13 | (apx-soln)",6,yes,0.06855424342805233
 14 | "StochOptForest
 15 | (apx-soln)",7,yes,0.07250858323937605
 16 | "StochOptForest
 17 | (apx-soln)",8,yes,0.07437847979651073
 18 | "StochOptForest
 19 | (apx-soln)",9,yes,0.0723020842941162
 20 | "StochOptForest
 21 | (apx-soln)",10,yes,0.07232448863908808
 22 | "StochOptForest
 23 | (apx-soln)",1,no,0.05350153996416246
 24 | "StochOptForest
 25 | (apx-soln)",2,no,0.07195800477255869
 26 | "StochOptForest
 27 | (apx-soln)",3,no,0.11135997668320562
 28 | "StochOptForest
 29 | (apx-soln)",4,no,0.10754462808537676
 30 | "StochOptForest
 31 | (apx-soln)",5,no,0.11184710975139862
 32 | "StochOptForest
 33 | (apx-soln)",6,no,0.10939557945253747
 34 | "StochOptForest
 35 | (apx-soln)",7,no,0.10590436233295143
 36 | "StochOptForest
 37 | (apx-soln)",8,no,0.10733836450674175
 38 | "StochOptForest
 39 | (apx-soln)",9,no,0.11116622297398436
 40 | "StochOptForest
 41 | (apx-soln)",10,no,0.10998421147708269
 42 | "StochOptForest
 43 | (apx-risk)",1,yes,0.23589676174554317
 44 | "StochOptForest
 45 | (apx-risk)",2,yes,0.1946405468987452
 46 | "StochOptForest
 47 | (apx-risk)",3,yes,0.07057744917723668
 48 | "StochOptForest
 49 | (apx-risk)",4,yes,0.07015628624836612
 50 | "StochOptForest
 51 | (apx-risk)",5,yes,0.06743476569808689
 52 | "StochOptForest
 53 | (apx-risk)",6,yes,0.06947465556697989
 54 | "StochOptForest
 55 | (apx-risk)",7,yes,0.07107206636439725
 56 | "StochOptForest
 57 | (apx-risk)",8,yes,0.07351355728333185
 58 | "StochOptForest
 59 | (apx-risk)",9,yes,0.07457710644514315
 60 | "StochOptForest
 61 | (apx-risk)",10,yes,0.07265680457216935
 62 | "StochOptForest
 63 | (apx-risk)",1,no,0.21671365466600065
 64 | "StochOptForest
 65 | (apx-risk)",2,no,0.19311992177468507
 66 | "StochOptForest
 67 | (apx-risk)",3,no,0.0727519065292705
 68 | "StochOptForest
 69 | (apx-risk)",4,no,0.07355336260275457
 70 | "StochOptForest
 71 | (apx-risk)",5,no,0.07321503298716368
 72 | "StochOptForest
 73 | (apx-risk)",6,no,0.0712383507787897
 74 | "StochOptForest
 75 | (apx-risk)",7,no,0.07390279304936768
 76 | "StochOptForest
 77 | (apx-risk)",8,no,0.07510430587636739
 78 | "StochOptForest
 79 | (apx-risk)",9,no,0.07390739799083601
 80 | "StochOptForest
 81 | (apx-risk)",10,no,0.07649327374476417
 82 | RandForest,1,no,0.18178319874245172
 83 | RandForest,2,no,0.11285841958225491
 84 | RandForest,3,no,0.08639459473870344
 85 | RandForest,4,no,0.08861720775285367
 86 | RandForest,5,no,0.09081798928940357
 87 | RandForest,6,no,0.08920016527555992
 88 | RandForest,7,no,0.09023476937241287
 89 | RandForest,8,no,0.08732244537051535
 90 | RandForest,9,no,0.08727206138246975
 91 | RandForest,10,no,0.08549914849337391
 92 | "StochOptForest
 93 | (oracle)",1,yes,0.23819149961863326
 94 | "StochOptForest
 95 | (oracle)",2,yes,0.19632960925523615
 96 | "StochOptForest
 97 | (oracle)",3,yes,0.07036591507442246
 98 | "StochOptForest
 99 | (oracle)",4,yes,0.07089852273033183
100 | "StochOptForest
101 | (oracle)",5,yes,0.07088376081305103
102 | "StochOptForest
103 | (oracle)",6,yes,0.0681084017593458
104 | "StochOptForest
105 | (oracle)",7,yes,0.06985237578330247
106 | "StochOptForest
107 | (oracle)",8,yes,0.07189932551812939
108 | "StochOptForest
109 | (oracle)",9,yes,0.07224581386237132
110 | "StochOptForest
111 | (oracle)",10,yes,0.07122477558517593
112 | 


--------------------------------------------------------------------------------
/mean_var/feature_split_var_normal_oracle.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/feature_split_var_normal_oracle.pkl


--------------------------------------------------------------------------------
/mean_var/mean_violation_meanvar_normal_stoch.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/mean_violation_meanvar_normal_stoch.pkl


--------------------------------------------------------------------------------
/mean_var/rel_risk_meanvar_normal_stoch.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/rel_risk_meanvar_normal_stoch.pkl


--------------------------------------------------------------------------------
/mean_var/rel_risk_meanvar_normal_stoch_R.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/rel_risk_meanvar_normal_stoch_R.pkl


--------------------------------------------------------------------------------
/mean_var/rel_risk_meanvar_normal_stoch_oracle.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/rel_risk_meanvar_normal_stoch_oracle.pkl


--------------------------------------------------------------------------------
/mean_var/risk_meanvar_normal_stoch.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/risk_meanvar_normal_stoch.pkl


--------------------------------------------------------------------------------
/mean_var/risk_meanvar_normal_stoch_oracle.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/risk_meanvar_normal_stoch_oracle.pkl


--------------------------------------------------------------------------------
/mean_var/risk_var_normal.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/risk_var_normal.pkl


--------------------------------------------------------------------------------
/mean_var/risk_var_normal_oracle.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/risk_var_normal_oracle.pkl


--------------------------------------------------------------------------------
/mean_var/speed_meanvar.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%run tree.py\n",
 10 |     "%run meanvar_tree_utilities.py\n",
 11 |     "%run regression_tree.py"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "N_list = [100, 200, 400]; p = 10\n",
 21 |     "R = 0.1; alpha = 0.1; obj_coef = 0\n",
 22 |     "lb = 0; ub = 1;  sum_bound = 1; if_stoch_constr = False\n",
 23 |     "runs = 10\n",
 24 |     "\n",
 25 |     "generate_Y = generate_Y_lognormal\n",
 26 |     "cond_mean = [lambda x: np.exp(x[:, 0])/5, lambda x: x[:, 0]/5, lambda x: np.abs(x[:, 0])/5]\n",
 27 |     "cond_std = [lambda x: 1 - 0.5*((-3<=x[:, 1]) & (x[:, 1]<=-1)), lambda x: 1 - 0.5*((-1<=x[:, 1])&(x[:, 1]<=1)), lambda x: 1 - 0.5*((1<=x[:, 1])&(x[:, 1]<=3))]\n",
 28 |     "\n",
 29 |     "opt_solver = partial(solve_mean_variance, alpha = alpha, R = R, obj_coef = obj_coef, lb = lb, ub = ub, sum_bound = sum_bound, if_stoch_constr = if_stoch_constr)\n",
 30 |     "hessian_computer = partial(compute_hessian, alpha = alpha)\n",
 31 |     "active_constraint = partial(search_active_constraint,  R = R, lb = lb, ub = ub, sum_bound = sum_bound, if_stoch_constr = if_stoch_constr)\n",
 32 |     "gradient_computer = partial(compute_gradient,  alpha = alpha, R = R, obj_coef = obj_coef)\n",
 33 |     "update_step = partial(compute_update_step, R = R)\n",
 34 |     "\n",
 35 |     "time_list = {str(N): {key: np.zeros(runs) for key in [\"rf_approx_risk\", \"rf_approx_sol\",  \"rf_oracle\"]} for N in N_list}"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "for N in N_list:\n",
 45 |     "    print(\"N:\", N)\n",
 46 |     "    n_proposals = N; \n",
 47 |     "    mtry = p;\n",
 48 |     "    subsample_ratio = 1;\n",
 49 |     "    max_depth=100; \n",
 50 |     "    min_leaf_size=10; \n",
 51 |     "    balancedness_tol = 0.2; \n",
 52 |     "    honesty = False;\n",
 53 |     "    verbose = False; oracle = True;\n",
 54 |     "    bootstrap = True; \n",
 55 |     "    \n",
 56 |     "    X_list = [np.random.normal(size = (N, p)) for run in range(runs)]\n",
 57 |     "    Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]\n",
 58 |     "\n",
 59 |     "    for run in range(runs):\n",
 60 |     "        print(\"run:\", run)\n",
 61 |     "        Y = Y_list[run]; Y_est = Y_list[run]\n",
 62 |     "        X = X_list[run]; X_est = X_list[run]; \n",
 63 |     "        \n",
 64 |     "        time1 = time.time()\n",
 65 |     "        rf_approx_risk = build_tree(Y, X, Y_est, X_est, \n",
 66 |     "                             opt_solver = opt_solver, hessian_computer = hessian_computer,\n",
 67 |     "                             gradient_computer = gradient_computer, \n",
 68 |     "                             search_active_constraint = active_constraint,\n",
 69 |     "                             compute_update_step = update_step,\n",
 70 |     "                             crit_computer = compute_crit_approx_risk, \n",
 71 |     "                             honesty = honesty, mtry = mtry,\n",
 72 |     "                             min_leaf_size = min_leaf_size, max_depth = max_depth, \n",
 73 |     "                             n_proposals = n_proposals, balancedness_tol = balancedness_tol,\n",
 74 |     "                             verbose = verbose)\n",
 75 |     "        time2 = time.time()\n",
 76 |     "        time_list[str(N)][\"rf_approx_risk\"][run] = time2 - time1\n",
 77 |     "\n",
 78 |     "        time1 = time.time()\n",
 79 |     "        rf_approx_sol = build_tree(Y, X, Y_est, X_est, \n",
 80 |     "                             opt_solver = opt_solver, hessian_computer = hessian_computer,\n",
 81 |     "                             gradient_computer = gradient_computer, \n",
 82 |     "                             search_active_constraint = active_constraint,\n",
 83 |     "                             compute_update_step = update_step,\n",
 84 |     "                             crit_computer = partial(compute_crit_approx_sol, obj_coef = obj_coef, alpha = alpha), \n",
 85 |     "                             honesty = honesty, mtry = mtry,\n",
 86 |     "                             min_leaf_size = min_leaf_size, max_depth = max_depth, \n",
 87 |     "                             n_proposals = n_proposals, balancedness_tol = balancedness_tol,\n",
 88 |     "                             verbose = verbose)\n",
 89 |     "        time2 = time.time()\n",
 90 |     "        time_list[str(N)][\"rf_approx_sol\"][run] = time2 - time1\n",
 91 |     "\n",
 92 |     "\n",
 93 |     "        time1 = time.time()\n",
 94 |     "        rf_oracle = build_tree(Y, X, Y_est, X_est, \n",
 95 |     "                             opt_solver = opt_solver, hessian_computer = hessian_computer,\n",
 96 |     "                             gradient_computer = gradient_computer, \n",
 97 |     "                             search_active_constraint = active_constraint,\n",
 98 |     "                             compute_update_step = update_step,\n",
 99 |     "                             crit_computer = partial(compute_crit_oracle, solver = opt_solver), \n",
100 |     "                             honesty = honesty, mtry = mtry,\n",
101 |     "                             min_leaf_size = min_leaf_size, max_depth = max_depth, \n",
102 |     "                             n_proposals = n_proposals, balancedness_tol = balancedness_tol,\n",
103 |     "                             verbose = verbose)\n",
104 |     "        time2 = time.time()\n",
105 |     "        time_list[str(N)][\"rf_oracle\"][run] = time2 - time1\n",
106 |     "    "
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "pickle.dump(time_list, open(\"time_meanvar.pkl\", \"wb\"))"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": []
124 |   }
125 |  ],
126 |  "metadata": {
127 |   "kernelspec": {
128 |    "display_name": "Python 3",
129 |    "language": "python",
130 |    "name": "python3"
131 |   },
132 |   "language_info": {
133 |    "codemirror_mode": {
134 |     "name": "ipython",
135 |     "version": 3
136 |    },
137 |    "file_extension": ".py",
138 |    "mimetype": "text/x-python",
139 |    "name": "python",
140 |    "nbconvert_exporter": "python",
141 |    "pygments_lexer": "ipython3",
142 |    "version": "3.8.8"
143 |   }
144 |  },
145 |  "nbformat": 4,
146 |  "nbformat_minor": 4
147 | }
148 | 


--------------------------------------------------------------------------------
/mean_var/time_meanvar.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/mean_var/time_meanvar.pkl


--------------------------------------------------------------------------------
/newsvendor/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/newsvendor/.DS_Store


--------------------------------------------------------------------------------
/newsvendor/Plotting_newsvendor.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Plotting_newsvendor"
  3 | author: "Xiaojie Mao"
  4 | date: "9/19/2020"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r}
  9 | library(tidyverse)
 10 | library(latex2exp)
 11 | ```
 12 | 
 13 | ```{r}
 14 | # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
 15 | colors = list("#D55E00", "#E69F00", "#56B4E9", "#009E73", "#CC79A7", "#0072B2")
 16 | names(colors) = c("StochOptForest\n(oracle)", "StochOptForest\n(apx-risk)", "StochOptForest\n(apx-soln)", "RandForest", "RandSplitForest", "GenRandForest")
 17 | ```
 18 | 
 19 | # Fig 5(a)
 20 | ```{r}
 21 | data = (read.csv("risk_nv_n.csv"))
 22 | data = as_tibble(data)
 23 | data$n = as.factor(data$n)
 24 | data$Method = factor(data$Method, levels =c("StochOptForest\n(oracle)", "StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "GenRandForest", "RandForest"))
 25 | ```
 26 | 
 27 | ```{r}
 28 | plot_risk_n = data %>% 
 29 |   ggplot(aes(x = n, y = rel_risk, fill = Method)) +
 30 |   geom_boxplot() +
 31 |   scale_fill_manual(values = 
 32 |     c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
 33 |       "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
 34 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
 35 |       "GenRandForest" = colors[["GenRandForest"]], 
 36 |       "RandForest" = colors[["RandForest"]])) + ylab("Relative risk")  + xlab(TeX("Sample size $n$")) +  theme(legend.key.height = unit(0.8, "cm"))
 37 | ```
 38 | 
 39 | ```{r}
 40 | plot_risk_n
 41 | ```
 42 | ```{r}
 43 | ggsave("../../paper/fig/nv_risk_n.pdf", plot = plot_risk_n,width = 6.5, height = 4.5)
 44 | ```
 45 | 
 46 | ```{r}
 47 | ggsave("nv_risk_n.pdf", plot = plot_risk_n,width = 10, height = 4.5)
 48 | ```
 49 | 
 50 | # Fig 5(b)
 51 | ```{r}
 52 | data = (read.csv("feature_split_nv_n.csv"))
 53 | data = as_tibble(data)
 54 | data$p = factor(data$p)
 55 | data$Method = factor(data$Method, levels =c("StochOptForest\n(oracle)", "StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "GenRandForest", "RandForest"))
 56 | ```
 57 | 
 58 | ```{r}
 59 | plot_feature_freq = data %>%  ggplot(aes(x = p, y = freq, fill = Method)) + geom_col(position='dodge', color = "black") + xlab("Covariate index") + ylab("Splitting frequency") + coord_flip() + 
 60 |   scale_fill_manual(values = 
 61 |     c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
 62 |       "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
 63 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]], 
 64 |       "GenRandForest" = colors[["GenRandForest"]], 
 65 |       "RandForest" = colors[["RandForest"]])) + theme(legend.position = "none")
 66 | ```
 67 | 
 68 | ```{r}
 69 | plot_feature_freq
 70 | ```
 71 | ```{r}
 72 | ggsave("../../paper/fig/nv_split_freq.pdf", plot = plot_feature_freq, height = 5.5, width = 2)
 73 | ```
 74 | 
 75 | # Fig 5(c)
 76 | ```{r}
 77 | data <- read_csv("feature_importance_n.csv")[, -1]
 78 | data$p = 1:10
 79 | data$p = factor(data$p)
 80 | colnames(data) = c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest", "StochOptForest\n(oracle)", "p")
 81 | data = data %>% pivot_longer(colnames(data)[1:4], names_to = "Method", values_to = "FI")
 82 | data$Method = factor(data$Method, levels =c("StochOptForest\n(oracle)", "StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandForest"))
 83 | ```
 84 | 
 85 | ```{r}
 86 | plot_feature_imp = data %>%
 87 |   ggplot(aes(x = p, y = FI, fill = Method)) +
 88 |   geom_col(position='dodge', color = "black") +
 89 |   xlab("Covariate index") + ylab("Normalized Importance") +
 90 |   scale_fill_manual(values =
 91 |                       c("StochOptForest\n(oracle)" = colors[["StochOptForest\n(oracle)"]],
 92 |                         "StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]],
 93 |                         "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]],
 94 |                         "RandForest" = colors[["RandForest"]])) +
 95 |   theme(legend.position = "none")
 96 | ```
 97 | 
 98 | ```{r}
 99 | ggsave("../../paper/fig/nv_feature_importance.pdf", plot = plot_feature_imp, height = 2, width = 10)
100 | ```
101 | 
102 | # Fig 6(a) 
103 | ```{r}
104 | data = (read.csv("risk_nv_p.csv"))
105 | data = as_tibble(data)
106 | data$p = factor(data$p)
107 | data$Method = factor(data$Method, levels =c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)", "RandSplitForest", "5-NN", "10-NN","50-NN"))
108 | ```
109 | 
110 | ```{r}
111 | plot_dimension = ggplot(data) + 
112 |   geom_boxplot(aes(x=Method, y=rel_risk, fill = p)) + 
113 |   ylab("Relative risk") + xlab("Method")
114 | ```
115 | 
116 | ```{r}
117 | plot_dimension
118 | ```
119 | 
120 | ```{r}
121 | ggsave("../../paper/fig/nv_risk_d.pdf", plot = plot_dimension, height = 2.5, width = 10)
122 | ```
123 | 
124 | # Fig 6(b)
125 | ```{r}
126 | data = read.csv("risk_nv_highdim.csv")
127 | data = as_tibble(data)
128 | data$p = factor(data$p)
129 | data$Method = factor(data$Method,
130 |                      levels =c("StochOptForest\n(oracle)",
131 |                                "StochOptForest\n(apx-soln)",
132 |                                "StochOptForest\n(apx-risk)",
133 |                                "RandSplitForest", "5-NN", "10-NN",
134 |                                "50-NN"))
135 | ```
136 | 
137 | ```{r}
138 | plot_highdimension = data %>% filter((Method != "StochOptForest\n(oracle)")) %>%
139 |   ggplot() +
140 |   geom_boxplot(aes(x=Method, y=rel_risk, fill = p)) +
141 |   ylab("Relative risk") + xlab("Method")
142 | ```
143 | 
144 | ```{r}
145 | plot_highdimension
146 | ```
147 | 
148 | ```{r}
149 | ggsave("../../paper/fig/nv_risk_high_dim.pdf",
150 |        plot = plot_highdimension, height = 2.5, width = 10)
151 | ```
152 | 
153 | # Fig 15(b)
154 | ```{r}
155 | data = (read.csv("risk_nv_honesty.csv"))
156 | data = as_tibble(data)
157 | data$n = as.factor(data$n)
158 | data$Method = factor(data$Method, levels =c("StochOptForest\n(apx-soln)", "StochOptForest\n(apx-risk)"))
159 | data$Honesty = factor(data$Honesty, levels = c("yes", "no"))
160 | ```
161 | 
162 | ```{r}
163 | plot_rel_risk_nv_honesty = data %>% 
164 |   ggplot(aes(x = n, y = rel_risk, fill = Method, linetype = Honesty)) +
165 |   geom_boxplot() +
166 |   scale_fill_manual(values = 
167 |     c("StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]], 
168 |       "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]])) + ylab("Relative risk")  + xlab(TeX("Sample size $n$")) + ylim(1, 3) + theme(legend.key.height = unit(0.8, "cm")) 
169 | ```
170 | 
171 | ```{r}
172 | plot_rel_risk_nv_honesty
173 | ```
174 | ```{r}
175 | ggsave("../../paper/fig/nv_risk_honesty.pdf", plot = plot_rel_risk_nv_honesty, height = 4.5, width = 5.5)
176 | ```
177 | 


--------------------------------------------------------------------------------
/newsvendor/experiment_nv_highdim.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from nv_tree_utilities import *
  3 | 
  4 | import mkl
  5 | mkl.set_num_threads(1)
  6 | 
  7 | 
  8 | seed = 0
  9 | np.random.seed(seed)
 10 | 
 11 | p_list = [10, 100, 200, 250]
 12 | runs = 50
 13 | n_jobs = 50
 14 | n_trees = 500;
 15 | N_list = [200]
 16 | Nx_test = 200
 17 | Ny_test = 2000
 18 | Ny_train = 1000
 19 | 
 20 | 
 21 | b_list = np.array([100., 1.])
 22 | h_list = np.array([5., 0.05])
 23 | C = 1000
 24 | L = len(h_list)
 25 | 
 26 | honesty = False; 
 27 | verbose = False; oracle = True;
 28 | bootstrap = True; 
 29 | 
 30 | cond_mean = [lambda x: 3, lambda x: 3]
 31 | cond_std = [lambda x: np.exp(x[:, 0]), lambda x: np.exp(x[:, 1])]
 32 | 
 33 | risk_all = {}
 34 | feature_split_all = {}
 35 | results_eval_all = {}
 36 | fi_all = {}
 37 | 
 38 | direct = ''
 39 | date = ''
 40 | output = direct + date + "nv_highdim.txt"
 41 | 
 42 | with open(output, 'w') as f:
 43 |     print("start", file = f)
 44 | 
 45 | for N in N_list:
 46 |     risk_all[str(N)] = {}
 47 |     feature_split_all[str(N)] = {}
 48 |     results_eval_all[str(N)] = {}
 49 |     fi_all[str(N)] = {}
 50 | 
 51 |     for p in p_list:
 52 |         with open(output, 'a') as f:
 53 |             print("N: ", N, file = f)
 54 |             print("p: ", p, file = f)
 55 | 
 56 |         n_proposals = N; 
 57 |         mtry = p;
 58 |         subsample_ratio = 1;
 59 |         max_depth=100; 
 60 |         min_leaf_size=10; 
 61 |         balancedness_tol = 0.2; 
 62 | 
 63 |         np.random.seed(seed)
 64 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 65 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std, seed = seed) for run in range(runs)]
 66 |     
 67 |     
 68 |         time1 = time.time()
 69 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_adaptive_nonadaptive_one_run)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 70 |             h_list = h_list, b_list = b_list, C = C, 
 71 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, 
 72 |             oracle = oracle, min_leaf_size = min_leaf_size, verbose = verbose, max_depth = max_depth,
 73 |             n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap, seed = seed) for run in range(runs))
 74 |         time2 = time.time()
 75 |         with open(output, 'a') as f:
 76 |             print("time: ", time2 - time1, file = f)
 77 |             print("------------------------", file = f)
 78 | 
 79 |         time1 = time.time()
 80 |         results_eval = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 81 |             Nx_test, Ny_train, Ny_test, cond_mean, cond_std,  
 82 |             h_list =h_list, b_list = b_list, C = C, verbose = verbose, seed = seed) for run in range(runs))
 83 |         time2 = time.time()
 84 |         results_eval_all[str(N)][str(p)] = results_eval
 85 |         with open(output, 'a') as f:
 86 |             print("time: ", time2 - time1, file = f)
 87 |             print("------------------------", file = f)
 88 | 
 89 |         risks = extract_risk(results_eval)
 90 |         with open(output, 'a') as f:
 91 |             print("risk with C", C, file=f)
 92 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 93 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 94 |             print("------------------------", file = f)
 95 |         risk_all[str(N)][str(p)] = risks
 96 | 
 97 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
 98 |         feature_split_all[str(N)][str(p)] = feature_split_freq
 99 | 
100 |         fi = evaluate_feature_importance(results_fit, p)
101 |         fi_all[str(N)][str(p)] = fi
102 | 
103 |         pickle.dump(risk_all, open(direct + date +  "risk_highdim.pkl", "wb"))
104 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_highdim.pkl", "wb"))
105 |         pickle.dump(fi_all, open(direct + date +  "feature_importance_highdim.pkl", "wb"))
106 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_highdim.pkl", "wb"))


--------------------------------------------------------------------------------
/newsvendor/experiment_nv_honesty.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from nv_tree_utilities import *
  3 | 
  4 | import mkl
  5 | mkl.set_num_threads(1)
  6 | 
  7 | # forest 
  8 | 
  9 | p_list = [10]
 10 | runs = 50
 11 | n_jobs = 50
 12 | n_trees = 500;
 13 | N_list = [100, 200, 400, 800]
 14 | Nx_test = 200
 15 | Ny_test = 2000
 16 | Ny_train = 1000
 17 | 
 18 | 
 19 | 
 20 | b_list = np.array([100., 1.])
 21 | h_list = np.array([5., 0.05])
 22 | C = 1000
 23 | L = len(h_list)
 24 | 
 25 | honesty = False; 
 26 | verbose = False; oracle = True;
 27 | bootstrap = False; 
 28 | 
 29 | cond_mean = [lambda x: 3, lambda x: 3]
 30 | cond_std = [lambda x: np.exp(x[:, 0]), lambda x: np.exp(x[:, 1])]
 31 | 
 32 | risk_all = {}
 33 | feature_split_all = {}
 34 | results_eval_all = {}
 35 | 
 36 | direct = ''
 37 | date = ''
 38 | output = "nv_honesty.txt"
 39 | 
 40 | with open(output, 'w') as f:
 41 |     print("start", file = f)
 42 | 
 43 | for N in N_list:
 44 |     risk_all[str(N)] = {}
 45 |     feature_split_all[str(N)] = {}
 46 |     results_eval_all[str(N)] = {}
 47 | 
 48 |     for p in p_list:
 49 |         with open(output, 'a') as f:
 50 |             print("N: ", N, file = f)
 51 |             print("p: ", p, file = f)
 52 | 
 53 |         n_proposals = N; 
 54 |         mtry = p;
 55 |         subsample_ratio = 0.63;
 56 |         max_depth=100; 
 57 |         min_leaf_size=10; 
 58 |         balancedness_tol = 0.2; 
 59 | 
 60 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 61 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std) for run in range(runs)]
 62 |     
 63 |         time1 = time.time()
 64 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest_one_run_honesty)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 65 |             h_list = h_list, b_list = b_list, C = C, 
 66 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, oracle = oracle, min_leaf_size = min_leaf_size, verbose = verbose, max_depth = max_depth, n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap) for run in range(runs))
 67 |         time2 = time.time()
 68 |         with open(output, 'a') as f:
 69 |             print("time: ", time2 - time1, file = f)
 70 |             print("------------------------", file = f)
 71 | 
 72 |         time1 = time.time()
 73 |         results_eval = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 74 |             Nx_test, Ny_train, Ny_test, cond_mean, cond_std,  
 75 |             h_list =h_list, b_list = b_list, C = C, verbose = verbose) for run in range(runs))
 76 |         time2 = time.time()
 77 |         results_eval_all[str(N)][str(p)] = results_eval
 78 |         with open(output, 'a') as f:
 79 |             print("time: ", time2 - time1, file = f)
 80 |             print("------------------------", file = f)
 81 | 
 82 |         risks = extract_risk(results_eval)
 83 |         with open(output, 'a') as f:
 84 |             print("risk with C", C, file=f)
 85 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 86 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 87 |             print("------------------------", file = f)
 88 |         risk_all[str(N)][str(p)] = risks
 89 | 
 90 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
 91 |         with open(output, 'a') as f:
 92 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
 93 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
 94 |             print("---", file = f)
 95 |             print("----------------------", file = f)
 96 |             print("----------------------", file = f)
 97 |         feature_split_all[str(N)][str(p)] = feature_split_freq
 98 | 
 99 |         pickle.dump(risk_all, open(direct + date +  "risk_nv_honesty.pkl", "wb"))
100 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_nv_honesty.pkl", "wb"))
101 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_nv_honesty.pkl", "wb"))


--------------------------------------------------------------------------------
/newsvendor/experiment_nv_n.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from nv_tree_utilities import *
  3 | 
  4 | import mkl
  5 | mkl.set_num_threads(1)
  6 | 
  7 | 
  8 | seed = 0
  9 | np.random.seed(seed)
 10 | 
 11 | p_list = [10]
 12 | runs = 50
 13 | n_jobs = 50
 14 | n_trees = 500;
 15 | N_list = [100, 200, 400, 800]
 16 | Nx_test = 200
 17 | Ny_test = 2000
 18 | Ny_train = 1000
 19 | 
 20 | 
 21 | b_list = np.array([100., 1.])
 22 | h_list = np.array([5., 0.05])
 23 | C = 1000
 24 | L = len(h_list)
 25 | 
 26 | honesty = False; 
 27 | verbose = False; oracle = True;
 28 | bootstrap = True; 
 29 | 
 30 | cond_mean = [lambda x: 3, lambda x: 3]
 31 | cond_std = [lambda x: np.exp(x[:, 0]), lambda x: np.exp(x[:, 1])]
 32 | 
 33 | risk_all = {}
 34 | feature_split_all = {}
 35 | results_eval_all = {}
 36 | fi_all = {}
 37 | 
 38 | direct = ''
 39 | date = ''
 40 | output = direct + date + "nv_n.txt"
 41 | 
 42 | with open(output, 'w') as f:
 43 |     print("start", file = f)
 44 | 
 45 | for N in N_list:
 46 |     risk_all[str(N)] = {}
 47 |     feature_split_all[str(N)] = {}
 48 |     results_eval_all[str(N)] = {}
 49 |     fi_all[str(N)] = {}
 50 | 
 51 |     for p in p_list:
 52 |         with open(output, 'a') as f:
 53 |             print("N: ", N, file = f)
 54 |             print("p: ", p, file = f)
 55 | 
 56 |         n_proposals = N; 
 57 |         mtry = p;
 58 |         subsample_ratio = 1;
 59 |         max_depth=100; 
 60 |         min_leaf_size=10; 
 61 |         balancedness_tol = 0.2; 
 62 | 
 63 |         np.random.seed(seed)
 64 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 65 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std, seed = seed) for run in range(runs)]
 66 |     
 67 |         time1 = time.time()
 68 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_forest_one_run)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 69 |             h_list = h_list, b_list = b_list, C = C, 
 70 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, 
 71 |             oracle = oracle, min_leaf_size = min_leaf_size, verbose = verbose, max_depth = max_depth, 
 72 |             n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap, seed = seed) for run in range(runs))
 73 |         time2 = time.time()
 74 |         with open(output, 'a') as f:
 75 |             print("time: ", time2 - time1, file = f)
 76 |             print("------------------------", file = f)
 77 | 
 78 | 
 79 |         time1 = time.time()
 80 |         results_eval = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 81 |             Nx_test, Ny_train, Ny_test, cond_mean, cond_std,  
 82 |             h_list =h_list, b_list = b_list, C = C, verbose = verbose, seed =seed) for run in range(runs))
 83 |         time2 = time.time()
 84 |         results_eval_all[str(N)][str(p)] = results_eval
 85 |         with open(output, 'a') as f:
 86 |             print("time: ", time2 - time1, file = f)
 87 |             print("------------------------", file = f)
 88 | 
 89 |         risks = extract_risk(results_eval)
 90 |         with open(output, 'a') as f:
 91 |             print("risk with C", C, file=f)
 92 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 93 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 94 |             print("------------------------", file = f)
 95 |         risk_all[str(N)][str(p)] = risks
 96 | 
 97 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
 98 |         with open(output, 'a') as f:
 99 |             for k,v in sorted(feature_split_freq.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
100 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
101 |             print("---", file = f)
102 |             print("----------------------", file = f)
103 |             print("----------------------", file = f)
104 |         feature_split_all[str(N)][str(p)] = feature_split_freq
105 | 
106 |         fi = evaluate_feature_importance(results_fit, p)
107 |         with open(output, 'a') as f:
108 |             for k,v in sorted(fi.items(), key = lambda x: x[1].mean(0)[0], reverse = True):
109 |                 print(k,"frac feat. slt.:",v.mean(0), file = f)
110 |             print("---", file = f)
111 |             print("----------------------", file = f)
112 |             print("----------------------", file = f)
113 |         fi_all[str(N)][str(p)] = fi
114 | 
115 |         pickle.dump(risk_all, open(direct + date +  "risk_n.pkl", "wb"))
116 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_n.pkl", "wb"))
117 |         pickle.dump(fi_all, open(direct + date +  "feature_importance_n.pkl", "wb"))
118 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_n.pkl", "wb"))


--------------------------------------------------------------------------------
/newsvendor/experiment_nv_p.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from nv_tree_utilities import *
  3 | 
  4 | import mkl
  5 | mkl.set_num_threads(1)
  6 | 
  7 | seed = 0
  8 | np.random.seed(seed)
  9 | 
 10 | p_list = [5, 10, 20, 40, 80]
 11 | runs = 50
 12 | n_jobs = 50
 13 | n_trees = 500;
 14 | N_list = [800]
 15 | Nx_test = 200
 16 | Ny_test = 2000
 17 | Ny_train = 1000
 18 | 
 19 | 
 20 | b_list = np.array([100., 1.])
 21 | h_list = np.array([5., 0.05])
 22 | C = 1000
 23 | L = len(h_list)
 24 | 
 25 | honesty = False; 
 26 | verbose = False; oracle = False;
 27 | bootstrap = True; 
 28 | 
 29 | cond_mean = [lambda x: 3, lambda x: 3]
 30 | cond_std = [lambda x: np.exp(x[:, 0]), lambda x: np.exp(x[:, 1])]
 31 | 
 32 | risk_all = {}
 33 | feature_split_all = {}
 34 | results_eval_all = {}
 35 | 
 36 | direct = ''
 37 | date = ''
 38 | output = "nv_p.txt"
 39 | 
 40 | with open(output, 'w') as f:
 41 |     print("start", file = f)
 42 | 
 43 | for N in N_list:
 44 |     risk_all[str(N)] = {}
 45 |     feature_split_all[str(N)] = {}
 46 |     results_eval_all[str(N)] = {}
 47 | 
 48 |     for p in p_list:
 49 |         with open(output, 'a') as f:
 50 |             print("N: ", N, file = f)
 51 |             print("p: ", p, file = f)
 52 | 
 53 |         n_proposals = N; 
 54 |         mtry = p;
 55 |         subsample_ratio = 1;
 56 |         max_depth=100; 
 57 |         min_leaf_size=10; 
 58 |         balancedness_tol = 0.2; 
 59 | 
 60 |         np.random.seed(seed)
 61 |         X_list = [np.random.normal(size = (N, p)) for run in range(runs)]
 62 |         Y_list = [generate_Y(X_list[run], cond_mean, cond_std, seed = seed) for run in range(runs)]
 63 |     
 64 |         time1 = time.time()
 65 |         results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(compare_adaptive_nonadaptive_one_run)(X_list[run], Y_list[run], X_list[run], Y_list[run], 
 66 |             h_list = h_list, b_list = b_list, C = C, 
 67 |             n_trees = n_trees, honesty= honesty, mtry = mtry, subsample_ratio = subsample_ratio, 
 68 |             oracle = oracle, min_leaf_size = min_leaf_size, verbose = verbose, max_depth = max_depth, 
 69 |             n_proposals = n_proposals, balancedness_tol = balancedness_tol, bootstrap = bootstrap, seed = seed) for run in range(runs))
 70 |         time2 = time.time()
 71 |         with open(output, 'a') as f:
 72 |             print("time: ", time2 - time1, file = f)
 73 |             print("------------------------", file = f)
 74 | 
 75 |         time1 = time.time()
 76 |         results_eval = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run)(results_fit[run], X_list[run], Y_list[run], X_list[run], Y_list[run], 
 77 |             Nx_test, Ny_train, Ny_test, cond_mean, cond_std,  
 78 |             h_list =h_list, b_list = b_list, C = C, verbose = verbose, seed = seed) for run in range(runs))
 79 |         time2 = time.time()
 80 |         results_eval_all[str(N)][str(p)] = results_eval
 81 |         with open(output, 'a') as f:
 82 |             print("time: ", time2 - time1, file = f)
 83 |             print("------------------------", file = f)
 84 | 
 85 |         risks = extract_risk(results_eval)
 86 |         with open(output, 'a') as f:
 87 |             print("risk with C", C, file=f)
 88 |             for k,v in sorted(risks.items(), key = lambda x: x[1].mean()):
 89 |                 print(k,"avg risk:",np.mean(v),"+-", 2*np.std(v)/np.sqrt(len(v)), file=f)
 90 |             print("------------------------", file = f)
 91 |         risk_all[str(N)][str(p)] = risks
 92 | 
 93 |         feature_split_freq = evaluate_feature_split_freq(results_fit, p)
 94 |         feature_split_all[str(N)][str(p)] = feature_split_freq
 95 | 
 96 |         fi = evaluate_feature_importance(results_fit, p)
 97 |         fi_all[str(N)][str(p)] = fi
 98 | 
 99 |         pickle.dump(risk_all, open(direct + date +  "risk_p.pkl", "wb"))
100 |         pickle.dump(feature_split_all, open(direct + date +  "feature_split_p.pkl", "wb"))
101 |         pickle.dump(fi_all, open(direct + date +  "feature_importance_p.pkl", "wb"))
102 |         pickle.dump(results_eval_all, open(direct + date +  "results_eval_p.pkl", "wb"))


--------------------------------------------------------------------------------
/newsvendor/feature_importance_n.csv:
--------------------------------------------------------------------------------
 1 | ,rf_approx_sol,rf_approx_risk,rf_rf,rf_oracle
 2 | 0,1.0,1.0,1.0,1.0
 3 | 1,0.08632444262953876,0.02584373247198452,0.9882243294645368,0.0043971939125613405
 4 | 2,0.08133486496495981,0.015944733824826043,0.0007951013731738756,0.0021133482202872764
 5 | 3,0.08469853229460055,0.004493087823986835,0.006350785759740854,0.0015200753520682533
 6 | 4,0.0,0.012806043434713412,0.000546128750667915,0.003026667778235436
 7 | 5,0.07929192069475678,0.0,0.01072968730449335,0.0011688538096748
 8 | 6,0.0953164104354058,0.017980985750953464,0.0,0.0005952855728902988
 9 | 7,0.08846674276727116,0.006456512488516669,0.016024409534570724,0.0017717371415174338
10 | 8,0.08501270596707973,0.010716133382051513,0.0082562110968299,0.0
11 | 9,0.047158342420841125,0.010983384571570299,0.004042219283690514,0.0030484962870817216
12 | 


--------------------------------------------------------------------------------
/newsvendor/feature_importance_n.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/newsvendor/feature_importance_n.pkl


--------------------------------------------------------------------------------
/newsvendor/feature_split_n.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/newsvendor/feature_split_n.pkl


--------------------------------------------------------------------------------
/newsvendor/feature_split_nv_n.csv:
--------------------------------------------------------------------------------
 1 | Method,p,freq
 2 | "StochOptForest
 3 | (apx-soln)",1,0.23663385918599145
 4 | "StochOptForest
 5 | (apx-soln)",2,0.1262997567085451
 6 | "StochOptForest
 7 | (apx-soln)",3,0.07791925602640702
 8 | "StochOptForest
 9 | (apx-soln)",4,0.07769282681743239
10 | "StochOptForest
11 | (apx-soln)",5,0.08113828902301615
12 | "StochOptForest
13 | (apx-soln)",6,0.0806953474209536
14 | "StochOptForest
15 | (apx-soln)",7,0.08102196280087386
16 | "StochOptForest
17 | (apx-soln)",8,0.07771413306626371
18 | "StochOptForest
19 | (apx-soln)",9,0.08097209934229173
20 | "StochOptForest
21 | (apx-soln)",10,0.07991246960822321
22 | "StochOptForest
23 | (apx-risk)",1,0.22374253472272354
24 | "StochOptForest
25 | (apx-risk)",2,0.10909307073102957
26 | "StochOptForest
27 | (apx-risk)",3,0.08166454071465018
28 | "StochOptForest
29 | (apx-risk)",4,0.08484455255102795
30 | "StochOptForest
31 | (apx-risk)",5,0.08381093431933583
32 | "StochOptForest
33 | (apx-risk)",6,0.08524660152935798
34 | "StochOptForest
35 | (apx-risk)",7,0.08158559994379509
36 | "StochOptForest
37 | (apx-risk)",8,0.08313891260306452
38 | "StochOptForest
39 | (apx-risk)",9,0.08278475837028335
40 | "StochOptForest
41 | (apx-risk)",10,0.0840884945147343
42 | RandForest,1,0.18977236392695807
43 | RandForest,2,0.18911362308098986
44 | RandForest,3,0.07589339713745286
45 | RandForest,4,0.07615980521204559
46 | RandForest,5,0.0767848762302172
47 | RandForest,6,0.07624711199664415
48 | RandForest,7,0.07959259896062247
49 | RandForest,8,0.08103123447364283
50 | RandForest,9,0.07794068822285624
51 | RandForest,10,0.07746430075856832
52 | GenRandForest,1,0.1953419323890857
53 | GenRandForest,2,0.19507679615193496
54 | GenRandForest,3,0.07476715220979922
55 | GenRandForest,4,0.07694163151331686
56 | GenRandForest,5,0.07807596317980833
57 | GenRandForest,6,0.07582893310150947
58 | GenRandForest,7,0.07414173390247965
59 | GenRandForest,8,0.07724279569753775
60 | GenRandForest,9,0.07618743700591918
61 | GenRandForest,10,0.07639562484861023
62 | "StochOptForest
63 | (oracle)",1,0.23507518808865682
64 | "StochOptForest
65 | (oracle)",2,0.1069486884719195
66 | "StochOptForest
67 | (oracle)",3,0.07969762495200153
68 | "StochOptForest
69 | (oracle)",4,0.0807801901800168
70 | "StochOptForest
71 | (oracle)",5,0.08287676098888702
72 | "StochOptForest
73 | (oracle)",6,0.08406969822085818
74 | "StochOptForest
75 | (oracle)",7,0.08439041734492453
76 | "StochOptForest
77 | (oracle)",8,0.08108021975130959
78 | "StochOptForest
79 | (oracle)",9,0.08324577188162768
80 | "StochOptForest
81 | (oracle)",10,0.08183544011979904
82 | 


--------------------------------------------------------------------------------
/newsvendor/feature_split_p.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/newsvendor/feature_split_p.pkl


--------------------------------------------------------------------------------
/newsvendor/risk_highdim.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/newsvendor/risk_highdim.pkl


--------------------------------------------------------------------------------
/newsvendor/risk_n.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/newsvendor/risk_n.pkl


--------------------------------------------------------------------------------
/newsvendor/risk_nv_honesty.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/newsvendor/risk_nv_honesty.pkl


--------------------------------------------------------------------------------
/newsvendor/risk_p.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/newsvendor/risk_p.pkl


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | This repo contains code for the paper [Stochastic Optimization Forests](https://arxiv.org/abs/2008.07473). 
 2 | 
 3 | 
 4 | # Code structure 
 5 | The tree and forest classes are in tree.py, and the splitting criterion implementations for newsvendor problem, CVaR optimization, mean variance optimization, shortest path optimization are in newsvendor/nv_tree_utilities.py, cvar/cvar_tree_utilities.py, mean_var/meanvar_tree_utilities.py, and uber/cvar_tree_utilities.py, respectively. All scripts for different experiments are experiment_*.py files in each directory. Calling 'python experiment_name.py' will run these experiments in python. 
 6 | 
 7 | Part of the code for tree and forest classes builds on the [EconML](https://github.com/microsoft/EconML) package: 
 8 | - EconML: A Python Package for ML-Based Heterogeneous Treatment Effects Estimation. https://github.com/microsoft/EconML, 2019. Version 0.x.
 9 | 
10 | 
11 | # Generating the figures and tables
12 | The basic process of generating the figures is to first run the corresponding experiment script in each directory to get experimental results stored in .pkl files, and then use prepare_plot_data.ipynb to transform the .pkl files into .csv files, and finally use the .Rmd file in each directory to generate the plots. 
13 | 
14 | ## CVaR Portfolio optimization 
15 | ### Figure 2
16 | - Figure 2(a): cvar/experiment_cvar_lognormal.py --> cvar/risk_cvar_lognormal.pkl -->  cvar/risk_lognormal.csv --> cvar/Plotting_cvar.Rmd
17 | - Figure 2(b): cvar/experiment_cvar_lognormal.py --> cvar/feature_imp_cvar_lognormal.pkl -->
18 | cvar/feature_imp_cvar_lognormal.csv --> cvar/Plotting_cvar.Rmd
19 | 
20 | ### Figure 7 - 9
21 | - Figure 7: cvar/experiment_cvar_lognormal.py --> cvar/feature_split_cvar_lognormal.pkl --> cvar/feature_split_cvar_lognormal.csv --> cvar/Plotting_cvar.Rmd
22 | - Figure 8: cvar/experiment_cvar_lognormal_oracle.py --> cvar/risk_cvar_lognormal_oracle.pkl --> cvar/risk_lognormal_oracle.csv--> cvar/Plotting_cvar.Rmd
23 | - Figire 9: cvar/experiment_cvar_lognormal_objcoef.py --> cvar/risk_cvar_lognormal_objcoef.pkl --> cvar/risk_lognormal_objcoef.csv --> cvar/Plotting_cvar.Rmd
24 | 
25 | ### Figure 10 
26 | - Figure 10(a): cvar/experiment_cvar_normal.py --> cvar/risk_cvar_normal.pkl --> cvar/risk_normal.csv--> cvar/Plotting_cvar.Rmd
27 | - Figure 10(b): cvar/experiment_cvar_normal_oracle.py --> cvar/risk_cvar_normal_oracle.pkl --> cvar/risk_normal_oracle.csv--> cvar/Plotting_cvar.Rmd
28 | 
29 | ## Uber experiment 
30 | - All raw data files are in uber/data. 
31 | - See uber/data_downloading.R and uber/preprocessing.R for data collection and preprocessing.
32 | 
33 | ### Figure 3
34 | uber/experiment_downtown_years.py --> uber/downtown_risks_forest_years_halfyear.pkl,  uber/downtown_risks_forest_years_oneyear.pkl, uber/downtown_risks_forest_years_onehalfyear.pkl, uber/downtown_risks_forest_years_twoyear.pkl --> uber/downtown_risks_forest_years_halfyear.csv,  uber/downtown_risks_forest_years_oneyear.csv, uber/downtown_risks_forest_years_onehalfyear.csv, uber/downtown_risks_forest_years_twoyear.csv --> 
35 | Plotting_uber.Rmd
36 |   
37 | 
38 | ## Newsvendor 
39 | ### Figure 5
40 | - Fig 5(a) newsvendor/experiment_nv_n.py --> newsvendor/risk_n.pkl -->  newsvendor/risk_nv_n.csv --> newsvendor/Plotting_newsvendor.Rmd
41 | - Fig 5(b) newsvendor/experiment_nv_n.py --> newsvendor/feature_split_n.pkl -->  newsvendor/feature_split_nv_n.csv --> newsvendor/Plotting_newsvendor.Rmd
42 | - Fig 5(c) newsvendor/experiment_nv_n.py --> newsvendor/feature_importance_n.pkl -->  newsvendor/feature_importance_n.csv --> newsvendor/Plotting_newsvendor.Rmd
43 | 
44 | ### Figure 6
45 | - Fig 6(a) newsvendor/experiment_nv_p.py --> newsvendor/risk_p.pkl --> newsvendor/risk_nv_p.csv --> newsvendor/Plotting_newsvendor.Rmd
46 | - Fig 6(b) newsvendor/experiment_nv_highdim.py --> newsvendor/risk_highdim.pkl -->  newsvendor/risk_highdim.csv --> newsvendor/Plotting_newsvendor.Rmd
47 | 
48 | ## mean-variance optimization
49 | ### Figure 4
50 | - Fig 4(a): mean_var/experiment_meanvar_stoch.py --> mean_var/rel_risk_meanvar_normal_stoch.pkl -->  mean_var/rel_risk_full.csv --> mean_var/Plotting_meanvar.Rmd
51 | - Fig 4(b): mean_var/experiment_meanvar_stoch.py --> mean_var/feature_split_meanvar_normal_stoch.pkl --> mean_var/feature_freq_full.csv --> mean_var/Plotting_meanvar.Rmd
52 | - Fig 4(c): mean_var/experiment_meanvar_stoch.py --> mean_var/cond_violation_meanvar_normal_stoch.pkl --> mean_var/cond_violation_full.csv --> mean_var/Plotting_meanvar.Rmd
53 | - Fig 4(d): mean_var/experiment_meanvar_stoch.py --> mean_var/mean_violation_meanvar_normal_stoch.pkl --> mean_var/marginal_violation_full.csv --> mean_var/Plotting_meanvar.Rmd
54 | 
55 | ### Figure 12
56 | - Fig 12(a): mean_var/experiment_var_normal_oracle.py --> mean_var/risk_var_normal_oracle.pkl --> mean_var/risk_var_normal_oracle.csv --> mean_var/Plotting_var.Rmd
57 | - Fig 12(b): mean_var/experiment_var_normal_oracle.py --> mean_var/feature_split_var_normal_oracle.pkl --> mean_var/feature_split_var_normal_oracle.csv --> mean_var/Plotting_var.Rmd
58 | - Fig 12(c): mean_var/experiment_var_normal.py --> mean_var/risk_var_normal.pkl --> mean_var/risk_var_normal.csv --> mean_var/Plotting_var.Rmd
59 | 
60 | ### Figure 13
61 | - Fig 13(a): mean_var/experiment_meanvar_stoch_oracle.py --> mean_var/rel_risk_meanvar_normal_stoch_oracle.pkl -->  mean_var/rel_risk_full_oracle.csv --> mean_var/Plotting_meanvar.Rmd
62 | - Fig 13(b): mean_var/experiment_meanvar_stoch.py --> mean_var/rel_risk_meanvar_normal_stoch_oracle.pkl --> mean_var/feature_freq_full_oracle.csv --> mean_var/Plotting_meanvar.Rmd
63 | 
64 | ### Figure 14
65 | - Fig 14: mean_var/experiment_meanvar_stoch_R.py --> mean_var/rel_risk_meanvar_normal_stoch_R.pkl -->  mean_var/rel_risk_full_R.csv --> mean_var/Plotting_meanvar.Rmd
66 | 
67 | ## honest forests
68 | - Fig 15(a): cvar/experiment_cvar_lognormal_honesty.py --> cvar/risk_cvar_lognormal_honesty.pkl --> cvar/risk_lognormal_honesty.csv --> cvar/Plotting_cvar.Rmd
69 | - Fig 15(b): newsvendor/experiment_nv_honesty.py --> newsvendor/risk_nv_honesty.pkl --> newsvendor/risk_nv_honesty.csv --> newsvendor/Plotting_newsvendor.Rmd
70 | 
71 | ## Running time 
72 | - Table 1: cvar/speed_cvar.ipynb --> time_cvar.pkl
73 | - Table 2: mean_var/speed_meanvar.ipynb --> time_meanvar.pkl
74 | 
75 | # Dependencies
76 | ## python 3.6.10
77 | - gurobipy                  9.0.2
78 | - joblib                    0.16.0
79 | - numpy                     1.19.1
80 | - scikit-learn              0.23.2
81 | - scipy                     1.3.1
82 | ## R 3.6.1
83 | - latex2exp 0.4.0
84 | - tidyverse 1.3.0
85 | 
86 | 


--------------------------------------------------------------------------------
/uber/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/uber/.DS_Store


--------------------------------------------------------------------------------
/uber/A_downtwon_1221to1256.csv:
--------------------------------------------------------------------------------
 1 | V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V75,V76,V77,V78,V79,V80,V81,V82,V83,V84,V85,V86,V87,V88,V89,V90,V91,V92,V93
 2 | -1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 3 | 1,0,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 4 | 0,1,1,-1,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 5 | 0,0,0,1,0,0,0,-1,-1,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 6 | 0,0,0,0,1,0,0,1,0,0,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 7 | 0,0,0,0,0,1,0,0,0,0,0,0,1,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 8 | 0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 9 | 0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10 | 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
11 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13 | 0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
14 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,-1,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
15 | 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
17 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,-1,-1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
19 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
20 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
21 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,-1,-1,-1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
23 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
24 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
25 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
26 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
27 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
28 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,-1,-1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
29 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
30 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
31 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
32 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
33 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
34 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
35 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
36 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0
37 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0
38 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,-1,-1,0,0,0,0,0,0,0,0,0
39 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0
40 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,-1,0,0,0,0,0,0,0
41 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,-1,0,0,0,0,0,0
42 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,-1,-1,-1,0,0,0
43 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,-1,-1,0
44 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-1
45 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
46 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1
47 | 


--------------------------------------------------------------------------------
/uber/Plotting_uber.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Plotting_uber"
 3 | output: html_document
 4 | ---
 5 | 
 6 | # Figure 3
 7 | ```{r}
 8 | colors = list("#D55E00", "#E69F00", "#56B4E9", "#009E73", "#CC79A7", "#0072B2")
 9 | names(colors) = c("StochOptForest\n(oracle)",
10 |                   "StochOptForest\n(apx-risk)",
11 |                   "StochOptForest\n(apx-soln)", "RandForest",
12 |                   "RandSplitForest", "GenRandForest")
13 | compute_rel_riks = function(downtown_risks){
14 |   colnames(downtown_risks) = c("rf", "random", "grf",
15 |                                "apx-sol", "apx-risk",
16 |                                "apx-sol-uncstr", "apx-risk-uncstr",
17 |                                "vanilla", "oracle")
18 |   risks =vector("list", ncol(downtown_risks))
19 |   for (i in 1:ncol(downtown_risks)){
20 |     risks[[i]] = (downtown_risks[, "vanilla"] - downtown_risks[, i])/(downtown_risks[, "vanilla"] - downtown_risks[, "oracle"])
21 |     colnames(risks[[i]]) = NULL
22 |   }
23 |   names(risks) = colnames(downtown_risks)
24 |   risks = as_tibble(data.frame(risks))
25 |   colnames(risks) = colnames(downtown_risks)
26 |   risks = risks %>% select(-c(vanilla, oracle))
27 |   risks %>% pivot_longer(colnames(risks),
28 |                          names_to = "methods", values_to = "rel_risk") %>%
29 |     mutate(Constraint = case_when(methods == "apx-sol" ~ "yes",
30 |                                   methods == "apx-risk"~ "yes",
31 |                                   TRUE ~ "no")) %>%
32 |     mutate(Method = case_when(
33 |       methods == "rf" ~ "RandForest",
34 |       methods == "random" ~ "RandSplitForest",
35 |       methods == "apx-sol" ~ "StochOptForest\n(apx-soln)",
36 |       methods == "apx-sol-uncstr" ~ "StochOptForest\n(apx-soln)",
37 |       methods == "apx-risk" ~ "StochOptForest\n(apx-risk)",
38 |       methods == "apx-risk-uncstr" ~ "StochOptForest\n(apx-risk)",
39 |       methods == "grf" ~ "GenRandForest\n(modified)",
40 |     )) %>%
41 |     mutate(Constraint = factor(Constraint, levels = c("yes", "no"))) %>%
42 |     mutate(Method = factor(Method, levels = c("StochOptForest\n(apx-risk)","StochOptForest\n(apx-soln)","RandForest",
43 |                                               "GenRandForest\n(modified)","RandSplitForest")))
44 | }
45 | ```
46 | 
47 | ```{r}
48 | downtown_risks =read_csv(paste0("downtown_risks", "_forest_years_twoyear.csv"))[, -1]
49 | risk2 = compute_rel_riks(downtown_risks) %>% mutate(Years = 2)
50 | downtown_risks =read_csv(paste0("downtown_risks", "_forest_years_onehalfyear.csv"))[, -1]
51 | risk1.5 = compute_rel_riks(downtown_risks) %>% mutate(Years = 1.5)
52 | downtown_risks =read_csv(paste0("downtown_risks", "_forest_years_oneyear.csv"))[, -1]
53 | risk1 = compute_rel_riks(downtown_risks) %>% mutate(Years = 1)
54 | downtown_risks =read_csv(paste0("downtown_risks", "_forest_years_halfyear.csv"))[, -1]
55 | risk0.5 = compute_rel_riks(downtown_risks) %>% mutate(Years = 0.5)
56 | pp = bind_rows(risk2, risk1,risk0.5, risk1.5) %>%
57 |   mutate(Method = factor(Method, levels = c("StochOptForest\n(apx-risk)","StochOptForest\n(apx-soln)",
58 |                                             "GenRandForest\n(modified)", "RandForest", "RandSplitForest"))) %>%
59 |   mutate(Years = factor(Years)) %>%
60 |   filter(!((Method == "StochOptForest\n(apx-risk)") & (Constraint == "no"))) %>% 
61 |   filter(!((Method == "StochOptForest\n(apx-soln)") & (Constraint == "no"))) %>% 
62 |   ggplot(aes(x = Years, y = rel_risk,
63 |              fill = Method, linetype = Constraint)) +
64 |   scale_fill_manual(values =
65 |                       c("StochOptForest\n(apx-risk)" = colors[["StochOptForest\n(apx-risk)"]],
66 |                         "StochOptForest\n(apx-soln)" =  colors[["StochOptForest\n(apx-soln)"]],
67 |                         "GenRandForest\n(modified)" = colors[["GenRandForest"]],
68 |                         "RandForest" = colors[["RandForest"]],
69 |                         "RandSplitForest" = colors[["RandSplitForest"]])) +
70 |   geom_boxplot() +
71 |   ylab("% of Realized Improvement") +
72 |   xlab("Sample Size (Years)")
73 | ```
74 | 
75 | ```{r}
76 | pp
77 | ```
78 | 
79 | ```{r}
80 | ggsave("../../paper/fig/improvement.pdf",
81 |        plot = pp, height = 4.5, width = 10)
82 | ```
83 | 


--------------------------------------------------------------------------------
/uber/b_downtwon_1221to1256.csv:
--------------------------------------------------------------------------------
 1 | b
 2 | -1
 3 | 0
 4 | 0
 5 | 0
 6 | 0
 7 | 0
 8 | 0
 9 | 0
10 | 0
11 | 0
12 | 0
13 | 0
14 | 0
15 | 0
16 | 0
17 | 0
18 | 0
19 | 0
20 | 0
21 | 0
22 | 0
23 | 0
24 | 0
25 | 0
26 | 0
27 | 0
28 | 0
29 | 0
30 | 0
31 | 0
32 | 0
33 | 0
34 | 0
35 | 0
36 | 0
37 | 0
38 | 0
39 | 1
40 | 0
41 | 0
42 | 0
43 | 0
44 | 0
45 | 0
46 | 0
47 | 


--------------------------------------------------------------------------------
/uber/data_downloading.R:
--------------------------------------------------------------------------------
  1 | library(tidyverse)
  2 | library(lubridate)
  3 | library(riem)
  4 | 
  5 | make_small_matrix <- function(x, y){
  6 |   XX = rep(x, length(y))
  7 |   YY = y
  8 |   temp = cbind(XX, YY)
  9 |   temp
 10 | }
 11 | 
 12 | edges = list()
 13 | edges[["1221"]] = make_small_matrix(1221, c(1222, 1220))
 14 | edges[["1222"]] = make_small_matrix(1222, c(1220))
 15 | edges[["1220"]] = make_small_matrix(1220 , c(1230, 1223, 1224, 1390))
 16 | edges[["1230"]] = make_small_matrix(1230 , c(1223, 1229, 1228, 1232, 1235))
 17 | edges[["1223"]] = make_small_matrix(1223 , c(1224, 1229))
 18 | edges[["1224"]] = make_small_matrix(1224 , c(1390, 1229))
 19 | edges[["1390"]] = make_small_matrix(1390 , c(1228, 1234, 1380))
 20 | edges[["1229"]] = make_small_matrix(1229 , c(1228))
 21 | edges[["1228"]] = make_small_matrix(1228 , c(1234, 1232, 1233))
 22 | edges[["1234"]] = make_small_matrix(1234 , c(1380, 1233))
 23 | edges[["1380"]] = make_small_matrix(1380 , c(1382))
 24 | edges[["1232"]] = make_small_matrix(1232 , c(1233, 1254))
 25 | edges[["1233"]] = make_small_matrix(1233 , c(1380, 1254, 1255, 1263))
 26 | 
 27 | edges[["1235"]] = make_small_matrix(1235 , c(1254, 1237))
 28 | edges[["1254"]] = make_small_matrix(1254 , c(1255, 1252, 1251))
 29 | edges[["1255"]] = make_small_matrix(1255 , c(1263, 1258))
 30 | edges[["1263"]] = make_small_matrix(1263 , c(1382, 1260, 1262))
 31 | edges[["1382"]] = make_small_matrix(1382 , c(1384))
 32 | 
 33 | edges[["1237"]] = make_small_matrix(1237 , c(1252, 1236, 1239))
 34 | edges[["1252"]] = make_small_matrix(1252 , c(1251, 1253))
 35 | edges[["1251"]] = make_small_matrix(1251 , c(1255, 1250, 1248))
 36 | edges[["1236"]] = make_small_matrix(1236 , c(1253, 1238))
 37 | edges[["1253"]] = make_small_matrix(1253 , c(1250, 1251))
 38 | 
 39 | edges[["1239"]] = make_small_matrix(1239 , c(1238, 1240))
 40 | edges[["1238"]] = make_small_matrix(1238 , c(1250, 1249, 1241))
 41 | edges[["1250"]] = make_small_matrix(1250 , c(1248, 1249))
 42 | edges[["1249"]] = make_small_matrix(1249 , c(1257, 1246))
 43 | edges[["1248"]] = make_small_matrix(1248 , c(1258, 1249))
 44 | edges[["1258"]] = make_small_matrix(1258 , c(1260, 1257))
 45 | edges[["1257"]] = make_small_matrix(1257 , c(1260, 1256))
 46 | edges[["1260"]] = make_small_matrix(1260 , c(1262, 1259))
 47 | edges[["1262"]] = make_small_matrix(1262 , c(1384, 1261))
 48 | edges[["1384"]] = make_small_matrix(1384 , c(1383))
 49 | 
 50 | edges[["1240"]] = make_small_matrix(1240 , c(1241, 1243))
 51 | edges[["1241"]] = make_small_matrix(1241 , c(1246, 1247, 1243))
 52 | edges[["1246"]] = make_small_matrix(1246 , c(1256, 1247))
 53 | edges[["1247"]] = make_small_matrix(1247 , c(1256, 1245))
 54 | edges[["1256"]] = make_small_matrix(1256 , c(1259))
 55 | edges[["1259"]] = make_small_matrix(1259 , c(1261))
 56 | edges[["1261"]] = make_small_matrix(1261 , c(1383))
 57 | 
 58 | edges[["1243"]] = make_small_matrix(1243 , c(1245, 1244, 1242))
 59 | edges[["1245"]] = make_small_matrix(1245 , c(1256, 1244))
 60 | edges[["1242"]] = make_small_matrix(1242 , c(1244))
 61 | edges = do.call(rbind, edges)
 62 | 
 63 | unique_edges = unique(c(edges))
 64 | census = rep(0, length(unique_edges))
 65 | names(census) = unique_edges
 66 | census["1221"] = 206032
 67 | census["1222"] = 206050
 68 | census["1220"] = 206031
 69 | census["1230"] = 207400
 70 | census["1223"] = 206200
 71 | census["1224"] = 206300
 72 | census["1390"] = 226002
 73 | census["1229"] = 207302
 74 | census["1228"] = 207301
 75 | census["1234"] = 207900
 76 | census["1380"] = 224010
 77 | census["1232"] = 207502
 78 | census["1233"] = 207710
 79 | census["1235"] = 208000
 80 | census["1254"] = 209200
 81 | census["1255"] = 209300
 82 | census["1263"] = 210010
 83 | census["1382"] = 224200
 84 | census["1237"] = 208302
 85 | census["1252"] = 209103
 86 | census["1251"] = 209102
 87 | census["1236"] = 208301
 88 | census["1253"] = 209104
 89 | census["1239"] = 208402
 90 | census["1238"] = 208401
 91 | census["1250"] = 208904
 92 | census["1249"] = 208903
 93 | census["1248"] = 208902
 94 | census["1258"] = 209403
 95 | census["1257"] = 209402
 96 | census["1260"] = 209520
 97 | census["1262"] = 209820
 98 | census["1384"] = 224320
 99 | census["1240"] = 208501
100 | census["1241"] = 208502
101 | census["1246"] = 208801
102 | census["1247"] = 208802
103 | census["1256"] = 209401
104 | census["1259"] = 209510
105 | census["1261"] = 209810
106 | census["1243"] = 208620
107 | census["1245"] = 208720
108 | census["1242"] = 208610
109 | census["1383"] = 224310
110 | census["1244"] = 208710
111 | 
112 | city="los_angeles"
113 | sd_dates = c('2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
114 |              "2018-01-01", "2018-04-01", "2018-07-01", "2018-10-01")
115 | ed_dates = c('2019-03-31', '2019-06-30', '2019-09-30', '2019-12-31',
116 |              '2018-03-31', '2018-06-30', '2018-09-30', '2018-12-31')
117 | coordinates = "&lat.=33.9978718&lng.=-118.4798331&z.=11.96"
118 | 
119 | links = vector("list", (length(sd_dates)))
120 | for (i in 1:(length(sd_dates))){
121 |   sd = sd_dates[i]
122 |   ed = ed_dates[i]
123 |   links[[i]] = vector("list", nrow(edges))
124 |   for (j in 1:nrow(edges)){
125 |     origin_code = edges[j, 1]
126 |     des_code = edges[j, 2]
127 | 
128 |     links[[i]][[j]] = paste0('https://movement.uber.com/explore/', city,
129 |                              '/travel-times/query?si=', origin_code,
130 |                              '&ti=', des_code,
131 |                              '&ag=censustracts&dt[tpb]=ALL_DAY&dt[wd;]=1,2,3,4,5,6,7&dt[dr][sd]=', sd,
132 |                              '&dt[dr][ed]=', ed, '&cd=&sa;=&sdn=&ta;=&tdn=', coordinates, '&lang=en-US')
133 |   }
134 | }
135 | file_names = vector("list", (length(sd_dates)))
136 | for (i in 1:(length(sd_dates))){
137 |   sd = sd_dates[i]
138 |   ed = ed_dates[i]
139 |   file_names[[i]] = vector("list", nrow(edges))
140 |   for (j in 1:nrow(edges)){
141 |     origin_code = edges[j, 1]
142 |     des_code = edges[j, 2]
143 | 
144 |     file_names[[i]][[j]] = paste0("origin", origin_code,
145 |                                   "_des", des_code,
146 |                                   "_sd", sd, "_ed", ed, ".csv")
147 |   }
148 | }
149 | 
150 | i = 1
151 | j = 1
152 | origin_code =edges[j, 1]
153 | des_code = edges[j, 2]
154 | sd = sd_dates[i]
155 | ed = ed_dates[i]
156 | cat("i ", i, "; j ", j)
157 | cat("origin id ", origin_code, "; destination id ", des_code)
158 | cat("origin census ", census[as.character(origin_code)],
159 |     "; destination census", census[as.character(des_code)])
160 | cat("starting date ", sd, "; ending date ", ed)
161 | browseURL(links[[i]][[j]])
162 | 
163 | file.rename("~/Downloads/Travel_Times_Daily.csv", paste0("~/Downloads/", file_names[[i]][[j]]))
164 | nrow(read_csv(paste0("~/Downloads/", file_names[[i]][[j]])))
165 | 
166 | 
167 | ######## generate_A_and_b
168 | id = unique(c(edges))
169 | A = matrix(0, nrow = length(id), ncol = nrow(edges))
170 | source = 1221
171 | end = 1256
172 | A[which(id == source), edges[, 1] == source] = -1
173 | A[which(id == end), edges[, 2] == end] = 1
174 | for (k in 1:length(id)){
175 |   if ((id[k] != source) & (id[k] != end)){
176 |     A[k, edges[, 2] == id[k]] = 1
177 |     A[k, edges[, 1] == id[k]] = -1
178 |   }
179 | }
180 | b = rep(0, length(id))
181 | b[which(id == source)] = -1
182 | b[which(id == end)] = 1
183 | write_csv(as.data.frame(A), "A_downtwon_1221to1256.csv")
184 | write_csv(as.data.frame(b), "b_downtwon_1221to1256.csv")
185 | 


--------------------------------------------------------------------------------
/uber/downtown_risks_forest_years_halfyear.csv:
--------------------------------------------------------------------------------
 1 | ,rf_rf,rf_random,grf,rf_approx_sol,rf_approx_risk,rf_approx_sol_unconstr,rf_approx_risk_unconstr,vanilla,oracle
 2 | 0,1556.2657884247833,1554.0220472151289,1553.5004955615993,1555.6333603178446,1554.0079894613718,1556.6672817661201,1556.0510127782666,1557.8249103453727,1519.0632089384058
 3 | 0,1583.2038267960447,1575.2175739991003,1571.4777120036038,1574.9779178997062,1575.1533657912744,1573.6310848081282,1573.69617806896,1574.1442051483364,1538.5825622192858
 4 | 0,1553.9089299157852,1569.5651552342713,1558.6120307177682,1555.006673520283,1554.302137035916,1558.6657315575846,1560.7313496853203,1559.0525082873157,1516.0728751158385
 5 | 0,1540.6876785345032,1549.8489085685162,1547.769572468231,1541.0763114661368,1544.8283972241102,1545.705327165351,1547.9959899163537,1542.3323355545774,1502.5312324199476
 6 | 0,1559.4328294815034,1568.5736386539907,1565.6698305285531,1557.7730560329799,1560.0937526604441,1561.324989894696,1565.3696533966927,1563.0221338094682,1523.532425807642
 7 | 0,1551.4218493193298,1557.8616923254385,1552.3943274616258,1551.6517242086259,1552.959413400799,1553.7650582599206,1553.0793611751592,1558.243241205606,1517.6354288111254
 8 | 0,1554.4192805024798,1554.0332334432144,1552.781909285234,1550.770620034135,1553.8822678123445,1553.1802101542,1552.8267667810144,1553.7308897664027,1511.0710615165285
 9 | 0,1573.1731729462695,1580.013813211253,1570.3253572076542,1563.6486632234387,1574.020212384487,1573.1564494322595,1571.2409436919665,1566.8603933728002,1524.498003437736
10 | 0,1551.674029945463,1551.8118441864685,1548.257896601583,1551.2526652658619,1548.0738057372557,1548.2918139791643,1549.0451488551557,1548.6986423906196,1509.9549442592563
11 | 0,1571.2989726038986,1586.7591318618083,1572.930779631815,1569.8861666160312,1567.4912171811836,1570.5886113674715,1571.9944397314953,1571.6030505214676,1529.2484448931468
12 | 0,1565.1805951023557,1565.3810765106377,1558.7175049949938,1559.4152379178036,1559.3273531596465,1559.5958591961216,1562.9022188553895,1563.5742508209908,1522.3650183951868
13 | 0,1560.8119306587867,1568.917498972182,1569.2308220840923,1560.2405269318501,1562.2286292321278,1564.3759701310994,1566.1720218557566,1558.544528893148,1518.7793462177885
14 | 0,1559.0847787874154,1567.9026160146534,1562.9754479260193,1555.9198898050267,1559.093291609595,1557.8084098343318,1565.7535675801328,1560.9229187519102,1520.8314245053775
15 | 0,1556.467135015713,1557.692183873778,1552.2960116840648,1554.0804824976733,1550.8709016628572,1552.071533259634,1554.045009131599,1556.2006977760313,1518.9692513094687
16 | 0,1547.5587848309913,1557.5722836067619,1548.4671539943852,1549.6890536743956,1548.3060696732532,1551.0790825264257,1551.345839018378,1549.990642264691,1512.898073162353
17 | 0,1577.888143880843,1583.4342096131431,1576.8890455676822,1575.2112502205368,1576.7936886860566,1576.77277844312,1580.5196721746556,1581.9070371955816,1533.737792505454
18 | 0,1555.2907293786948,1558.4971730186803,1551.440234471026,1554.383852098699,1551.8398211284525,1552.8040616682376,1554.9545055599874,1557.3734936686328,1512.7797558685595
19 | 0,1561.9985025014694,1563.022615010401,1561.4601594531737,1556.4261170133345,1560.9313372639592,1558.14957966507,1559.2679543002857,1557.3914237199463,1521.699183056066
20 | 0,1548.8823148161484,1557.4007040558456,1549.6261430581258,1549.1357003165788,1550.1168251020367,1552.08685939605,1551.8799256533318,1557.9056411666559,1515.0950831434716
21 | 0,1558.5435157964102,1561.6763831283731,1556.1630333038893,1556.0329991599458,1556.504400473128,1556.0777511191438,1557.1136412758879,1559.0471279180103,1518.7250471934726
22 | 0,1564.2598210004096,1560.6320523437507,1558.8905774543298,1561.147285237714,1557.3627328456284,1561.035485583835,1559.183238843398,1563.3263922193587,1520.4485358167556
23 | 0,1549.016934375374,1566.4311509398044,1552.7353742004766,1548.978742844179,1552.5394832236236,1554.4121312164605,1558.6267339154365,1552.6656723750982,1513.6672241164126
24 | 0,1537.6818025087396,1544.2691411965075,1539.8833278083816,1537.0672170996434,1538.5502419048191,1537.7954809118805,1538.1978679246756,1540.3976458492657,1504.8964968066991
25 | 0,1553.2633303654648,1564.5655692113978,1556.8849270328963,1550.9068805178588,1553.9837696819845,1555.1298600817124,1556.3280476994478,1557.3564844159168,1513.9880768859243
26 | 0,1545.5096199310615,1541.7695327727022,1541.576789246472,1539.0995355930904,1541.9437887461788,1538.7342350899144,1537.815452170453,1539.9243077225112,1504.3107388809647
27 | 0,1561.6895629896876,1558.1220514123763,1553.1415471854373,1553.0681988578194,1556.3788068327972,1553.2616593470475,1553.0125175763544,1553.724853443852,1516.1963035720107
28 | 0,1551.7238495997062,1553.8780505952873,1551.5360312273604,1549.9999111980187,1551.626469760043,1553.8375554635513,1555.4167703346502,1553.4807932918823,1514.0004629312277
29 | 0,1564.1009190290338,1566.3961702618383,1564.9969650040275,1562.4263573089563,1563.0943820922275,1561.5975884175273,1564.5804947429651,1550.4798656028588,1512.2090293491858
30 | 0,1536.5646204155896,1542.5514019594682,1542.1292339321083,1539.831040534467,1539.8287322077867,1541.9265620616088,1540.5436437048206,1543.9098148849398,1508.3093391749142
31 | 0,1556.4879888558435,1561.6710613428513,1557.5006695598527,1558.5944286571964,1557.6921920161394,1559.2034243310063,1559.9499533921535,1562.3759067188312,1519.6756046148903
32 | 0,1563.4925039173954,1564.0878594728467,1560.32915210482,1559.814936470416,1559.0696250715885,1560.0608785913043,1559.8398145506837,1562.636027511366,1522.7492059338433
33 | 0,1533.032218204873,1530.8267559044032,1536.1223248337224,1526.8702032960266,1535.691292353344,1530.6195566280614,1528.3943771921674,1527.792576872686,1491.7100147048493
34 | 0,1571.9974698230985,1568.1435575553878,1575.2592182670903,1564.105382338173,1571.6329502621668,1566.3106855578014,1569.6343949401898,1560.2599646898454,1521.2486074709414
35 | 0,1539.0643662926084,1541.662875190113,1541.411317692153,1539.2064510312434,1540.3821018000554,1540.2261892540441,1540.511579397636,1542.9665794163082,1506.4923163069495
36 | 0,1568.4747762898412,1580.8543385238652,1569.2608586021688,1565.4614071170988,1564.4422195429302,1568.0806343880836,1573.2747124500152,1566.9375044928654,1528.566577785199
37 | 0,1551.7224219013785,1566.4345641042473,1551.721439592645,1553.7799664484523,1553.3059710785665,1554.2997439674937,1558.6116155659122,1554.293027692699,1516.0559870826835
38 | 0,1546.973106918154,1549.0916230059568,1546.580536899617,1542.394493019367,1548.8997385957607,1544.7999576925827,1547.9088865735164,1547.8641058934543,1507.6251684481908
39 | 0,1564.3042172638857,1570.1019789359148,1565.072091483414,1560.8278487390494,1562.680172858553,1565.3825484000506,1562.618406042647,1561.0115394672157,1519.1692542825976
40 | 0,1548.4790674831506,1557.9939641457277,1551.497903634053,1550.0004120891317,1550.3240446069904,1552.515521455715,1553.265471853131,1553.2392497229544,1510.2798453398386
41 | 0,1540.750785663155,1547.2548532596163,1542.3737371618113,1540.8217207225855,1543.2150868842355,1544.6408206450885,1542.954552580943,1544.886958687577,1502.9079874839467
42 | 0,1551.7262859233963,1556.0696991015043,1556.040378677825,1553.5225194415852,1554.07169431849,1555.4100469726852,1552.8395252092823,1553.9890674033736,1512.8410461285605
43 | 0,1562.7840295159142,1564.3653154088004,1561.7729373711925,1562.2419698490578,1562.1144937263816,1562.9227081780366,1564.6038443080308,1563.4993740866255,1525.059111407022
44 | 0,1555.3050022290533,1560.758032385601,1556.413920527554,1557.0754061684881,1557.4094439392545,1554.6414100838488,1556.4355444981452,1558.8422774307708,1518.5254672409678
45 | 0,1561.8148351971256,1564.652829401245,1565.9149275343775,1559.110724057111,1560.4930222010137,1561.8252610551358,1561.5069670642256,1561.00169414949,1520.7719291603244
46 | 0,1557.55081202388,1565.082421817996,1554.4350269555587,1556.4971591970607,1553.0376361637993,1555.4667871087638,1556.8916228345004,1549.4964067675173,1515.1858864601488
47 | 0,1547.5592899306823,1551.6317521828435,1549.0597154304876,1547.5415493539974,1547.8019652162466,1549.1196677736962,1549.3400922470767,1549.9455891425039,1511.674408527385
48 | 0,1572.2427112083692,1576.1217224698948,1576.5868743887827,1572.3221840071467,1575.6749795552132,1573.5398480096962,1574.716819896338,1574.1433826308373,1532.3240764028985
49 | 0,1561.7467936632856,1560.1841623494558,1562.3398018347768,1557.9678539567083,1562.68494813373,1560.3311356450715,1558.7974949436227,1562.0063415853876,1521.4209591732938
50 | 0,1564.357280213687,1565.928778940794,1558.893195991901,1560.6222643074448,1558.437885180673,1561.6109290474747,1560.6834966127,1565.0019694564032,1522.0466642026108
51 | 0,1551.6196418466602,1551.5979401196453,1546.9858456773593,1546.7347505315308,1546.3551085310405,1544.735440407784,1547.4982168044728,1545.1452327990687,1512.7771600416525
52 | 


--------------------------------------------------------------------------------
/uber/downtown_risks_forest_years_halfyear.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/uber/downtown_risks_forest_years_halfyear.pkl


--------------------------------------------------------------------------------
/uber/downtown_risks_forest_years_onehalfyear.csv:
--------------------------------------------------------------------------------
 1 | ,rf_rf,rf_random,grf,rf_approx_sol,rf_approx_risk,rf_approx_sol_unconstr,rf_approx_risk_unconstr,vanilla,oracle
 2 | 0,1556.6709784205707,1560.0283553212564,1555.2218014951407,1555.5374477302285,1556.2841729802476,1556.7792983910576,1555.7913198489136,1560.269946008092,1522.3415011167906
 3 | 0,1558.3361877490534,1561.112091591707,1557.2567485326697,1556.8085481334253,1555.7773634944062,1556.8007894302275,1556.8220724243029,1560.27402535442,1523.9241506754652
 4 | 0,1562.0948927436132,1561.515531479281,1558.7134290859028,1558.1062208933427,1558.7559727758676,1559.0271694618414,1559.9440356506677,1566.5885048952102,1527.916949666284
 5 | 0,1558.6606620410457,1560.033760701787,1556.8884725193977,1556.3076517092702,1556.9317869406434,1555.9474807452198,1556.3467767246323,1561.6757300968095,1525.0303726203383
 6 | 0,1550.061205110062,1549.917442888917,1548.6288548409293,1548.2784087281386,1545.8758351635522,1547.6002300062648,1547.6733508882573,1550.8941023457137,1515.2539166104964
 7 | 0,1547.5911925493808,1549.7827751141403,1546.3334332817426,1545.6933030391524,1545.1462772980615,1544.6656552179038,1545.1713849062571,1549.2976575519012,1511.7777886865674
 8 | 0,1550.0808845570236,1557.7504390099416,1549.7920282069492,1548.2175410551963,1548.5511081179302,1550.1961655659384,1552.5062508216893,1550.9100717645042,1516.0606137985044
 9 | 0,1562.3522102531701,1562.3731865275413,1561.7832311820293,1561.8866001515203,1561.3345818686503,1561.340540613024,1560.8116658806855,1565.1454784017913,1525.9158736349668
10 | 0,1552.2632268823006,1554.4905352727803,1552.6317569325938,1551.648326240133,1552.0401767312728,1551.8935718404978,1552.4352473237482,1557.0400365541495,1520.3601491875972
11 | 0,1555.2154969231808,1555.7938430309268,1553.6945803326112,1553.195043877979,1552.6775420901138,1551.5739506659907,1552.617821051328,1555.7754417752496,1520.6577476703817
12 | 0,1554.602367842642,1557.1089899279289,1553.3153622287307,1550.3508225851706,1552.5165021114824,1551.5371362122821,1552.8173742479125,1556.4624151446721,1518.9506069219171
13 | 0,1567.1252551300333,1565.514910054159,1563.7855771894513,1563.0505093347324,1563.7412599095107,1563.1483789575861,1563.9580269012292,1566.9038044812532,1527.8886166777977
14 | 0,1550.680334783631,1553.8523874901775,1547.7511450896973,1549.5885215073213,1547.7394512833814,1547.2401490020543,1549.1770582073389,1551.9289627394746,1516.7990721173069
15 | 0,1559.2413512178941,1567.3440207447284,1559.2053191714556,1558.358388164667,1558.731722103171,1559.221742504577,1561.7431946750382,1562.0157791327103,1520.828240325676
16 | 0,1554.8469041988958,1552.3798454816667,1553.1309648265797,1550.7207665413591,1552.1772254447933,1550.4511958780636,1550.344510992294,1556.9433212683673,1520.073788637082
17 | 0,1554.298121155734,1553.799700217088,1551.8150678626841,1551.3003502758615,1552.199812047447,1551.1736383809168,1552.3658085178192,1559.4146869562464,1522.3918927288053
18 | 0,1559.4219918529802,1566.5002750176066,1559.9917833049267,1558.1148255962544,1558.4174633444281,1559.0819606661728,1560.1524955081836,1560.5211081486038,1525.2688264402143
19 | 0,1549.419544339912,1551.0055387472355,1547.9999971413347,1549.4542170269387,1548.3268924663023,1548.516511420525,1548.7253828807554,1550.9195981249666,1517.154144916388
20 | 0,1558.3311169282122,1559.0990374027363,1557.2901788913432,1557.7507868156795,1558.5358153890475,1557.5396763573888,1557.993246032936,1561.4197094826122,1522.4011934998025
21 | 0,1547.8176597556292,1553.7226041507474,1548.0298471145672,1546.547810263076,1547.6173626589077,1546.5823910098268,1547.9220734235548,1545.892663955544,1511.7540994307856
22 | 0,1559.5841813250677,1558.5669512980503,1556.90969348365,1556.489945320418,1555.8671511106645,1556.3514133867336,1556.936628811809,1560.7497138649396,1523.5260816626098
23 | 0,1559.5314448539968,1559.7881496207467,1555.7725251205416,1556.3183491956872,1556.1320255751373,1556.369533478023,1557.1296415344332,1561.9100865675184,1524.90421949706
24 | 0,1557.9438787205147,1555.7835138424991,1554.6436457712093,1553.8630131516613,1555.3586794552727,1553.4588235747192,1552.4166018601888,1559.070201038532,1523.0918795717525
25 | 0,1548.530922360339,1550.5453353174478,1548.5019870310282,1546.565663209681,1547.270482922892,1546.291929303725,1547.0948038004988,1553.4817084161107,1513.2615072355923
26 | 0,1544.3092332000365,1545.4777871543183,1540.9213612445737,1540.5365616894724,1542.8387864943834,1539.5027868374882,1539.9443205297773,1544.6917467813978,1510.0231057371377
27 | 0,1556.0602444715162,1561.9464041335928,1556.296504844302,1556.618379251476,1556.4045272050541,1556.097008862393,1556.9279983146275,1560.51723348609,1522.5923768015316
28 | 0,1551.8268342751983,1556.4508549445138,1548.735419586012,1548.930868523185,1549.5896869377418,1549.042533689581,1549.9497547723893,1551.8107477016747,1517.471344197901
29 | 0,1549.0475534791758,1552.8192653875215,1551.8007115176085,1551.5511167383986,1550.0879561945637,1550.4684099908795,1550.9274030303889,1558.298770920494,1517.436888109078
30 | 0,1560.5577219472084,1562.305914715679,1561.5113413488011,1559.6292772056684,1559.5637399708469,1559.4273276459076,1561.0909937913523,1567.463767278534,1527.1315798430614
31 | 0,1558.1758034502882,1553.9239437085398,1555.648748069507,1553.9388212451288,1554.3777898193482,1552.0826716686984,1551.6389107544428,1559.0701721793075,1520.1389992925326
32 | 0,1553.4350970116197,1560.1158767272514,1551.3348340876942,1552.6714156408545,1550.6108947344549,1552.3686848446976,1553.2553763122908,1552.5265186421698,1518.1382194891823
33 | 0,1560.4923076612267,1556.7053419177134,1554.1165449504044,1555.3099302128096,1553.6627703228678,1552.7000567300881,1554.0880455960157,1556.0739635881928,1521.393101909482
34 | 0,1543.700782388528,1545.4184462381693,1540.9317430708943,1541.267056776982,1541.0472683050425,1539.6082407914032,1541.3954593625504,1542.6796317687,1510.2158962595256
35 | 0,1552.2172833256095,1553.3046402072553,1550.8842145308147,1549.3186828481641,1549.9115024810794,1549.7454933776655,1550.8680633854128,1555.1935001741733,1516.8721646530767
36 | 0,1555.1779525613815,1555.9439850391054,1552.8032587554985,1553.808924759853,1553.5195008062653,1553.1466589409297,1552.656568900413,1556.2150068275184,1520.5613167911397
37 | 0,1553.3456940356825,1553.946873846231,1553.3855256217366,1553.194808534067,1552.901601709287,1551.0145666807714,1551.84679062979,1556.6747662425116,1520.2266368823427
38 | 0,1552.061923546901,1549.8832475722527,1549.6579718646594,1551.1137176977566,1551.105085793468,1547.7872986888217,1548.0689829817206,1552.2061279663733,1517.1459918138337
39 | 0,1555.564997099227,1553.5697264090693,1552.4799835025601,1551.1853691119018,1550.818954435698,1549.8289462286918,1550.4119642764283,1553.5535807652884,1518.271610650716
40 | 0,1552.2292777201017,1556.2845336774383,1550.8963830519367,1551.7567639485885,1550.286585778154,1549.6327826902875,1551.3869724505184,1550.5047665202756,1516.3767252744221
41 | 0,1557.33528730576,1557.9852779797654,1553.7801774504846,1554.55195980465,1552.133593593809,1551.775491572601,1553.449708338014,1554.152407762263,1522.5079422525507
42 | 0,1562.188586507257,1564.2533322168185,1560.3170996178221,1560.0708418627282,1560.5275371246107,1560.7164431795086,1561.318982981902,1564.9594567418244,1527.7201782461586
43 | 0,1553.2007111499954,1557.0853396666255,1551.201192350057,1551.9447032839794,1550.7126486556963,1550.2262043258752,1551.4158906645737,1550.959907304402,1515.9897199668392
44 | 0,1555.7285109389236,1557.3697171694216,1555.9484767275007,1555.568175581694,1555.366725690569,1555.1878826387062,1554.2764815669814,1560.0327124199346,1520.4796251430828
45 | 0,1559.25361738426,1564.1470292127765,1560.1098169405007,1560.1175327816568,1559.3211796247115,1559.0931411234046,1559.7942442370318,1564.3151068215675,1526.3922978628768
46 | 0,1560.9036850509535,1562.144938835169,1558.582322348638,1559.7969855477152,1559.2133709423429,1556.1329253708834,1558.8842316423243,1555.9473238789512,1521.173640905819
47 | 0,1555.1483464634225,1555.6432975486632,1554.6126784027717,1553.8338671243207,1553.4324220606475,1551.2925367014855,1551.9236759317657,1555.0521683140735,1517.1509111686466
48 | 0,1552.1817898662919,1552.9721255725292,1551.7503946420597,1550.8209849675718,1552.3541029330863,1550.2747488750301,1550.8008263478523,1555.83321329905,1518.909486133302
49 | 0,1556.7728460636488,1562.9570049439262,1554.4661432967205,1554.0788269068191,1555.09475055604,1553.9389801940988,1556.4334855761638,1556.482721552672,1522.0500008041383
50 | 0,1557.5595203200935,1556.364791457772,1554.5698768891086,1553.8596783628748,1556.0827449476878,1553.3705383040751,1552.8308099257977,1558.6605981294379,1520.7358665712054
51 | 0,1546.4672308680179,1546.7081518610448,1545.2317845497012,1544.2532524765306,1544.409720239834,1544.3604712484034,1544.4612747219014,1549.3077039601594,1513.8789264384404
52 | 


--------------------------------------------------------------------------------
/uber/downtown_risks_forest_years_onehalfyear.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/uber/downtown_risks_forest_years_onehalfyear.pkl


--------------------------------------------------------------------------------
/uber/downtown_risks_forest_years_oneyear.csv:
--------------------------------------------------------------------------------
 1 | ,rf_rf,rf_random,grf,rf_approx_sol,rf_approx_risk,rf_approx_sol_unconstr,rf_approx_risk_unconstr,vanilla,oracle
 2 | 0,1560.9493272617296,1567.8989647672192,1562.5704495278417,1561.651823160486,1561.4936038494984,1561.3434068573208,1564.966547040373,1561.5879083560517,1521.9604738048224
 3 | 0,1542.78635560207,1541.3015239569954,1540.7091490722803,1539.51391439188,1541.408857327909,1539.7908790788988,1540.4803822432903,1540.0574160949566,1506.0939031808455
 4 | 0,1557.1982736662196,1560.5094966560127,1556.9495467214442,1556.1851193375978,1556.3066594828572,1557.165368435239,1556.785983546574,1560.362911371737,1520.5020583244893
 5 | 0,1556.333356613662,1563.79781665456,1558.6941808988222,1556.1724641205324,1556.4933745788028,1557.1344729536918,1558.5200876229458,1557.2319028604813,1518.9243847059606
 6 | 0,1551.9782786697203,1554.6767769278533,1550.48985524543,1550.94330372752,1550.8953742048293,1550.9290887072304,1551.7165066509776,1552.8553093155226,1514.8393644686341
 7 | 0,1554.9152862458295,1557.519111903884,1555.0030603825974,1554.4828646689136,1555.0679184953165,1555.6112515960044,1555.4018599827261,1556.116788131346,1518.0060424654278
 8 | 0,1541.8766474442946,1545.0988960512937,1542.8001054851281,1541.6840304028856,1542.082278591774,1542.4270799277294,1541.7702739320762,1546.2195850110104,1509.5224537161605
 9 | 0,1563.7000444535695,1563.928087976408,1561.8789615111816,1560.4122655710778,1560.935390361832,1560.8513219556214,1561.2840529995453,1563.0528233455527,1526.4441756001797
10 | 0,1552.1278725936015,1553.3565589233626,1552.8868763312748,1550.095035024965,1550.7748362687862,1550.2531970346972,1550.1286308587282,1553.1077888469254,1515.632573236322
11 | 0,1541.1127830870375,1542.0311956750647,1540.211891379508,1539.9547959053048,1541.5248765126871,1539.6663804992688,1539.2419621524812,1540.6044899891062,1505.5428979901671
12 | 0,1566.6301690844366,1571.8343544547781,1567.2198883305157,1565.3777786822661,1564.6913628740965,1565.8213100768376,1568.3627108366882,1565.9273397220522,1526.5606612718786
13 | 0,1557.5328100660581,1558.492795981041,1555.4340115125349,1555.0094357854912,1556.5846849731277,1554.8582420341452,1555.9266089194766,1557.8241998973854,1520.173713272346
14 | 0,1557.492562098553,1559.3989701812677,1555.8387738918948,1556.402698199945,1555.5730714296485,1556.6617812397967,1556.2101999295219,1558.438192000259,1518.2255162915426
15 | 0,1560.055372093591,1566.5526920848256,1560.3247812732216,1558.3827711097847,1557.688572191394,1558.372249746355,1558.720040631224,1560.670201387546,1522.5381319389353
16 | 0,1559.480275587299,1560.2324554808897,1558.131324586175,1557.3952305367327,1557.151830228245,1555.5553129275145,1556.341075939629,1558.388198062888,1517.8589146479953
17 | 0,1544.0502427057006,1546.5417651471555,1543.734443547686,1542.9125875828881,1543.249530719103,1542.1661442271368,1544.7821138794075,1548.0292525281877,1508.481607457784
18 | 0,1554.0803667028758,1557.1775235264513,1553.9733728763397,1552.8794705528228,1553.1711700556655,1554.9162994676315,1553.6822888032098,1557.7929694093646,1517.1002498823727
19 | 0,1532.394057220807,1537.8074994066471,1535.0698007389517,1532.6160228113592,1534.5557248557864,1533.689847881207,1533.2277022888272,1535.5291233824025,1501.890573849362
20 | 0,1565.2819816126205,1563.663795552628,1562.7703889922352,1560.9421233312842,1562.380104584585,1557.9670849136187,1558.867422163898,1559.3398404662291,1520.1950641897456
21 | 0,1553.9028372042517,1557.6031079404602,1557.6138146858277,1554.9299837199487,1554.4075596805778,1554.9845934375373,1556.6131478321183,1557.4245924442891,1519.597773366189
22 | 0,1554.5712601558064,1560.8192023943839,1556.0207679784048,1553.432261103525,1555.324011097778,1553.2551878662582,1555.5292981164107,1549.2993486046325,1511.976315827339
23 | 0,1561.690362089311,1556.8021897618182,1557.977042803105,1556.5807271523688,1557.9074170138883,1555.8792842325,1555.323136298733,1557.2820924933492,1520.402484614173
24 | 0,1555.17719690324,1559.8482485103043,1557.443444350638,1555.4309297657815,1554.9415159785053,1555.543441147832,1555.8952574119464,1555.2720998906493,1516.6899728041196
25 | 0,1557.3734467434692,1556.1679894073018,1555.3005695429881,1552.3214752899885,1554.9696499166075,1552.0921857809792,1553.947952183334,1554.98357479012,1518.396491121903
26 | 0,1552.748142892908,1556.8038332802012,1551.183924615048,1550.151422747896,1551.1768287605641,1550.5264672209728,1551.4683770909721,1552.7233325202121,1512.1551545359866
27 | 0,1560.332954553519,1562.902780708385,1562.6055386459439,1560.913388253869,1562.2303795209511,1560.4124111496724,1561.4737512604654,1563.2778988451655,1524.4493315745303
28 | 0,1550.3196799210848,1550.3341321652522,1549.7592409002268,1548.4473769030135,1548.8788448732223,1547.8852532816543,1548.6244660173807,1550.66028613323,1514.848605455081
29 | 0,1553.0796582843025,1557.5909615692149,1552.1222853492245,1551.4488415332626,1551.5984403320167,1549.64808068998,1552.6263955280062,1554.1383743704162,1518.5902194839612
30 | 0,1550.3453913982341,1549.2036155965318,1547.5009640070598,1549.1488001814519,1549.6722284634066,1545.5376760718286,1545.5782287184531,1548.2761480954287,1510.430723560741
31 | 0,1546.714346311901,1549.3242446561055,1549.0445435382794,1546.4908574480096,1546.6333165546055,1545.622719998179,1548.3466338775133,1548.0929745821281,1513.8196609389136
32 | 0,1558.4091226504902,1560.1132755545323,1559.0683318709582,1558.1571762563742,1558.187680495395,1559.0645160556955,1559.5714042423913,1559.7867216827558,1521.4627247589533
33 | 0,1545.8351153484764,1558.0126462455896,1545.718871326075,1544.8831922797142,1545.7764310827129,1545.8743005101824,1550.1659837035088,1547.7455436417522,1508.8323532095621
34 | 0,1556.4864632436263,1557.0764496238562,1555.4211473686423,1554.7900289792005,1554.9592824012877,1554.1177209191135,1555.6381150650489,1556.4340947504602,1519.8514730526426
35 | 0,1559.8450504022617,1559.0376084532502,1562.121619881426,1557.4128168372495,1559.4890566227987,1557.1486397422054,1556.2902430929596,1560.9702175526654,1522.3815666839266
36 | 0,1560.203856264511,1559.21377388133,1558.804117702767,1557.4243196327002,1557.0360825901855,1557.3958974527893,1557.6828279967165,1559.7709139008948,1520.8019104534571
37 | 0,1559.0069118332422,1568.017294801444,1558.1396363611698,1558.0804967361967,1558.0305142703282,1557.2368073230225,1560.9391683721783,1556.1835966051874,1518.7241486326213
38 | 0,1555.5568681754537,1553.6347084472777,1555.596010482309,1554.986100425221,1556.0592321628562,1551.982811241966,1552.890098573187,1552.00695239805,1514.190685503774
39 | 0,1548.1503914188343,1553.5599090359253,1548.605664486367,1547.3644604651724,1547.9650315094673,1547.3047237486444,1546.6005743156302,1552.1278261934776,1513.9759462924699
40 | 0,1557.0239748193928,1560.3722487166533,1556.9959346005978,1557.195153228984,1555.8595768344655,1557.657639594259,1559.0679172180296,1558.236491685874,1519.3503553793798
41 | 0,1557.8909905864389,1553.4496631524698,1554.7444874719947,1553.918907802139,1556.0625767606177,1553.904279556949,1552.1944557856805,1554.9336757294625,1519.0028222316432
42 | 0,1575.2399580188157,1580.780469199261,1577.9304208625847,1576.8968413657433,1576.6069313463702,1577.4612194655342,1577.9629291207912,1577.6527972460092,1536.1923648078432
43 | 0,1556.446766482982,1557.6220838209765,1556.2226592356687,1556.0785538388507,1556.1328772308461,1555.4232659172376,1555.7663742000368,1557.1393374456395,1519.9302052811317
44 | 0,1557.6146465167205,1555.6429297323707,1555.5714807767479,1557.791578561824,1557.458407357889,1555.6589080696253,1555.6400596673932,1556.483442067582,1522.4680895643053
45 | 0,1563.8857126329303,1564.9462393743597,1564.3257506907023,1562.6619582132078,1564.0815253512037,1564.3443267828386,1563.3025258202574,1562.8211571013055,1525.4839393821678
46 | 0,1556.6454712791087,1561.8756256010975,1559.8513131276375,1555.9400838630918,1556.2136425538067,1556.2036800966118,1559.9107452184808,1559.2808027713575,1519.3590253108168
47 | 0,1550.6582969222566,1555.7376718203648,1552.0573863054585,1547.0650147221397,1547.1367291028644,1549.6837638610584,1551.059654504041,1549.300873347706,1513.8158537064855
48 | 0,1557.702451145685,1558.646042877425,1557.236005715481,1557.7044372807763,1557.82118880469,1557.3173573130582,1556.5868967664728,1559.88043968334,1520.8637336660152
49 | 0,1558.9283040614766,1559.7476324545023,1560.8371203249615,1558.7939080688536,1559.0850498113377,1559.1936752871175,1560.245223864497,1562.9615307945526,1522.8490410655975
50 | 0,1561.8457939321581,1566.5301253727564,1563.812495504649,1561.3168893090174,1559.8517042818278,1559.628171881442,1561.6085686584602,1557.923619207985,1519.4245741422005
51 | 0,1546.2581738621907,1548.9104855367373,1547.059750165275,1545.531020557546,1546.7719803505975,1545.8407386960612,1545.841089215287,1547.704204650576,1510.4715516360523
52 | 


--------------------------------------------------------------------------------
/uber/downtown_risks_forest_years_oneyear.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/uber/downtown_risks_forest_years_oneyear.pkl


--------------------------------------------------------------------------------
/uber/downtown_risks_forest_years_twoyear.csv:
--------------------------------------------------------------------------------
 1 | ,rf_rf,rf_random,grf,rf_approx_sol,rf_approx_risk,rf_approx_sol_unconstr,rf_approx_risk_unconstr,vanilla,oracle
 2 | 0,1545.0403655444595,1548.7779116617253,1544.7708964067863,1544.9824425090058,1544.6323560532462,1544.632881164672,1546.3319538127744,1548.61359231605,1514.7144844151587
 3 | 0,1549.5193592953428,1553.0905330114947,1549.1141997561103,1549.0248107414782,1548.5317493071777,1549.05166506223,1550.01895675246,1553.6277265505032,1518.878979587809
 4 | 0,1546.3940551911137,1547.8209769400291,1545.5922451116803,1544.0000314023373,1544.5787890757404,1544.3334725180405,1544.7936847019812,1549.6622528987482,1515.4810253125709
 5 | 0,1539.3476745469009,1541.43336451537,1536.4724521490275,1537.60521414058,1536.0656268365992,1537.124082309371,1538.9815308799116,1541.4730057280449,1509.8965502236579
 6 | 0,1544.17674201202,1546.8222057604278,1544.2649035345114,1544.4886066520753,1543.8350943039895,1544.605385688091,1545.0864366977664,1551.1633151783465,1516.5507835830056
 7 | 0,1548.657163307169,1551.322966746711,1548.197234029674,1546.7408269898056,1546.6375932860597,1547.4834370783433,1547.6620070241424,1552.2990938240878,1518.1288564022175
 8 | 0,1543.8218513127185,1545.9383152133908,1543.2281679525004,1542.6385112047528,1542.8842057289353,1543.6023425740643,1544.2979159337824,1548.3521503905724,1514.6570304947993
 9 | 0,1550.0416847253177,1551.243551495463,1549.737273527957,1549.2973943817076,1548.3859908596137,1549.4052332410026,1549.369821848666,1554.828002954339,1518.0047619365264
10 | 0,1535.2427739356253,1541.7254580154959,1534.3293442677307,1535.0871858571522,1534.1666526132774,1534.9420373243472,1536.787448390497,1537.8830387404748,1504.2872368248334
11 | 0,1550.8762580777488,1548.2610368668904,1547.8184412972223,1546.0331242470452,1547.0667625926349,1546.1369425744906,1546.9923621309022,1552.191180215317,1518.5965932425433
12 | 0,1546.9705930308762,1550.5627927366731,1546.6140208138431,1546.2318696471475,1546.5039660228672,1547.181797119778,1547.3867991467823,1552.279815648723,1515.7762109857738
13 | 0,1547.110873627511,1548.9347174683912,1545.349190231469,1546.0648342604682,1544.9806158769777,1545.687052135761,1546.7003097726313,1549.5746147519592,1515.9959277767377
14 | 0,1546.1472934504138,1549.0310226553904,1546.2182527550501,1545.986408262622,1546.098830071724,1545.2367763938248,1546.228147137712,1550.8313222761083,1514.9797635003847
15 | 0,1523.5328544697911,1525.59407725351,1522.9802704060385,1522.4316084804866,1522.95969969595,1523.1103081838223,1522.600209178964,1528.1970547428268,1496.0700835756588
16 | 0,1532.6864535221584,1538.7854952069338,1532.310028865692,1530.8806485866457,1530.3205495600387,1531.5612062475302,1534.064356407713,1534.726217535128,1502.5856998430077
17 | 0,1546.1878672072844,1551.7414540982238,1546.2000136388215,1546.973271747036,1546.21919176862,1546.5671894981112,1547.45265505831,1549.719872582175,1512.7611197056162
18 | 0,1537.726152940382,1543.5524452581183,1537.4406763613952,1537.016605674105,1536.8850413325752,1536.3751282831654,1538.2270387305407,1540.0577893375146,1508.3910342302893
19 | 0,1545.5597116785325,1545.6789639330232,1545.0481961856735,1544.6873732869797,1544.2718576659563,1543.0321359707125,1544.3423324667508,1547.2537569649323,1513.3788893410956
20 | 0,1550.7343664544767,1552.728257387529,1549.6684526657875,1550.1502106532403,1548.7550219712964,1548.7369317013045,1550.8293741689765,1553.621055770916,1519.4701684939646
21 | 0,1543.5411665239928,1544.514184434482,1543.060767639979,1544.9499574482145,1542.4388046262811,1542.0976595325355,1543.4394210195635,1546.8778562555162,1514.8690150417767
22 | 0,1542.2771894822133,1544.3993523925542,1542.3209320062167,1541.4211521975335,1541.8563184467293,1541.4044388274315,1541.6784305888718,1548.9392460129873,1513.2550547577694
23 | 0,1540.9932278092797,1542.6650410470256,1540.487992043264,1539.5060138557203,1538.9077523715819,1540.444687883427,1542.1416024580317,1545.4567997384406,1510.7226969078367
24 | 0,1550.1372816482103,1549.5571572437382,1547.9216869560332,1547.8902457453873,1547.3985004906972,1547.8483256373877,1547.8130988233686,1552.4468503847706,1517.1607583544787
25 | 0,1547.3476425399951,1551.627307507929,1546.2627093648634,1547.0835846834345,1545.958783019677,1546.0628414446417,1546.699233179261,1549.0704785091823,1516.4876675187152
26 | 0,1537.6860503669834,1543.082592453957,1537.593511572401,1537.4467756485712,1538.0067788407864,1538.5831851936755,1539.6475107021065,1542.7179909250833,1509.1819751865949
27 | 0,1547.7194190204307,1548.611855139679,1546.7264314039232,1545.9795234117437,1548.0559924684542,1546.567634086374,1546.352751565825,1552.793170410151,1518.1260181421083
28 | 0,1547.3603037140072,1549.800480615346,1548.1117348040354,1546.5779206292495,1546.670028020793,1547.0078600893146,1547.6341212954096,1552.418556555753,1515.4348978030666
29 | 0,1543.6566942886966,1546.1127303124783,1540.9134610116116,1541.6340381203568,1541.1790537354705,1540.7327652706408,1543.3242261238613,1544.969515468969,1513.8041411097893
30 | 0,1550.695104522105,1557.5881576126417,1548.9483077927648,1551.1095831036996,1548.1420208997088,1549.6251470204224,1551.589182187194,1550.04618006521,1517.4276917591935
31 | 0,1543.2121208019707,1547.0388563902793,1542.5795097813098,1542.822588678884,1542.9895529456114,1543.913486569322,1544.5405534321167,1547.1261687193025,1512.0004046695883
32 | 0,1548.9871446314644,1552.6065732574236,1549.6031854168098,1549.235836253504,1549.3779979961914,1548.5321081534291,1550.3617148544936,1554.976585184154,1520.8812881057795
33 | 0,1544.5752967976107,1548.6704899241815,1544.2222846377226,1545.5602135708543,1544.9303619141353,1544.9468503431924,1544.8107539734017,1549.3890711791755,1514.4876716484318
34 | 0,1540.3558161971835,1544.614725180661,1540.808897628204,1540.9636971632306,1540.2045140528498,1540.6805688892778,1540.659512056507,1544.7445446604409,1510.2053736374558
35 | 0,1541.668662995301,1542.6077747285178,1538.8087721406198,1539.3997259843086,1539.4444587705375,1537.9519055651292,1538.6013976980848,1543.6904879431688,1510.969362815706
36 | 0,1540.8944526149937,1542.4622104665239,1539.633326372302,1540.0031069985735,1539.8618057983197,1538.6972117036944,1539.5092917433858,1545.5100832407627,1510.7030146118254
37 | 0,1548.8242264274388,1550.919190483288,1547.5689032364912,1546.6159568657326,1548.049195194971,1547.1687775095472,1548.742579130855,1553.8929890843185,1519.9596743985637
38 | 0,1549.8781979668163,1552.5134264202911,1546.5876803925767,1546.0682369211243,1546.0321288721784,1545.9285814411055,1549.2768526191808,1550.0961490986394,1516.23350928139
39 | 0,1554.6801194738398,1558.69985537392,1552.965747101604,1553.5599827493204,1552.622091719444,1552.8653158551997,1553.672714135806,1556.4283996728034,1523.550691618777
40 | 0,1538.722094306145,1541.6852172208555,1538.294220993828,1538.5874160423352,1538.4047943358573,1538.0271645600528,1539.0252741407517,1543.0178930723077,1510.030557844772
41 | 0,1548.9980235214184,1550.967366317888,1546.8620798408801,1546.623870209238,1548.1018090641855,1546.6982725798475,1548.042870123411,1552.7668585793504,1516.8551406817496
42 | 0,1537.5067170742846,1543.5374500894295,1538.7698164664312,1539.0102850707692,1537.1962281556996,1537.872160863176,1539.2419476826442,1540.6872304739406,1505.6377774527673
43 | 0,1556.5749191152818,1558.4251367792608,1556.138988832668,1555.747431373126,1556.036056149733,1555.8141851128948,1555.7312599216439,1562.5224621785605,1527.5101148918102
44 | 0,1545.0636080961065,1549.6670388770526,1546.2807297059708,1545.486892804487,1545.3537944887782,1545.0754209700485,1546.3529882673076,1550.6062404919442,1516.7667913186683
45 | 0,1543.3307492524928,1547.951188918882,1542.2128876830252,1542.459476183885,1541.6794524119618,1541.6862953436112,1543.7764555368033,1546.9324118767076,1514.2052657057984
46 | 0,1541.7238304861494,1545.1363371767936,1540.831717047847,1542.1683547565458,1541.8388698777023,1540.8571684774097,1542.1544894338897,1547.457712713469,1513.0429879059557
47 | 0,1550.7479890943039,1552.106262022899,1548.7959645673513,1548.253137253652,1548.2402268490912,1547.8236548665839,1549.5982177674475,1552.14424417733,1518.086409422658
48 | 0,1547.1373885072983,1549.65876086448,1546.2787626758407,1546.9141184414061,1546.0738975295726,1547.2046515560867,1547.5100187927044,1552.1686173776636,1518.459338593844
49 | 0,1542.053526709559,1547.9033491832183,1542.811422724414,1542.6395646379415,1541.507241196401,1541.5317460877764,1544.473560454916,1546.5973337443281,1511.992317597329
50 | 0,1539.261635715513,1541.3629687139105,1538.178876199819,1539.5837823210409,1537.6662544047374,1537.5104879666617,1540.305468693606,1541.4168009116752,1509.2116690067496
51 | 0,1547.8154096201886,1549.594113150983,1546.8695937120992,1547.3236555916058,1546.298527275891,1546.4929540186956,1547.8153076228598,1551.4872142786905,1518.0799138729164
52 | 


--------------------------------------------------------------------------------
/uber/downtown_risks_forest_years_twoyear.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CausalML/StochOptForest/e100b6a814d0a2305dc4f35fcda78d5111d27576/uber/downtown_risks_forest_years_twoyear.pkl


--------------------------------------------------------------------------------
/uber/experiment_downtown_years.py:
--------------------------------------------------------------------------------
  1 | from tree import *
  2 | from cvar_tree_utilities import *
  3 | import mkl
  4 | mkl.set_num_threads(1)
  5 | 
  6 | year_list = ["twoyear", "oneyear", "onehalfyear", "halfyear"]
  7 | X_list = {y: pd.read_csv("../X_" + str(y) + ".csv") for y in year_list}
  8 | Y_list = {y: pd.read_csv("../Y_" + str(y) + ".csv") for y in year_list}
  9 | A_mat = pd.read_csv("../A_downtwon_1221to1256.csv").to_numpy()
 10 | b_vec = pd.read_csv("../b_downtwon_1221to1256.csv").to_numpy()
 11 | 
 12 | 
 13 | seed = 0
 14 | np.random.seed(seed)
 15 | 
 16 | 
 17 | runs = 50
 18 | n_jobs = 50
 19 | 
 20 | alpha = 0.8
 21 | min_leaf_size = 10
 22 | max_depth = 100
 23 | n_proposals = 365
 24 | mtry = 65
 25 | honesty = False
 26 | balancedness_tol = 0.2
 27 | lb = 0; ub = 1
 28 | verbose = False
 29 | bootstrap = True;
 30 | n_trees = 100
 31 | subsample_ratio = 1
 32 | 
 33 | direct = ''
 34 | date = ''
 35 | output = direct + date + "experiment_downtown_years.txt"
 36 | with open(output, 'w') as f:
 37 |     print("start", file = f)
 38 | 
 39 | models_forest = {}
 40 | times_forest = {}
 41 | decisions_forest = {}
 42 | risks_forest = {}
 43 | feature_split_all = {}
 44 | feature_importance_all = {}
 45 | 
 46 | for year in year_list:
 47 |     with open(output, 'a') as f:
 48 |         print("year: ", year, file = f)
 49 | 
 50 |     X = X_list[year]
 51 |     Y = Y_list[year]
 52 | 
 53 |     enc = OneHotEncoder()
 54 |     enc.fit(X[["Period"]])
 55 |     tf = enc.transform(X[["Period"]]).toarray()
 56 |     X[["AM", "EarlyMorning", "Evening", "Midday", "PM"]] = tf
 57 |     X[["AM", "EarlyMorning", "Evening", "Midday", "PM"]] = X[["AM", "EarlyMorning", "Evening", "Midday", "PM"]].astype(int)
 58 | 
 59 |     sss = StratifiedShuffleSplit(n_splits = runs, test_size = 0.5, random_state = seed)
 60 |     sss.get_n_splits(range(X["Period"].shape[0]), X[["Period", "weekday"]])
 61 |     split_index = sss.split(range(X["Period"].shape[0]), X[["Period", "weekday"]])
 62 | 
 63 |     X.drop(["Period"], inplace = True, axis = 1)
 64 |     X_train_list = []; X_test_list = [];
 65 |     Y_train_list = []; Y_test_list = []
 66 |     for train_index, test_index in split_index:
 67 |         X_train_list.append(X.loc[train_index, ].to_numpy())
 68 |         X_test_list.append(X.loc[test_index, ].to_numpy())
 69 |         Y_train_list.append(Y.loc[train_index, ].to_numpy())
 70 |         Y_test_list.append(Y.loc[test_index, ].to_numpy())
 71 |     p = X_train_list[0].shape[1]
 72 | 
 73 |     time0 = time.time()
 74 |     results_fit = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(experiment_downtown_years)(Y_train_list[run], X_train_list[run], 
 75 |             Y_test_list[run], X_test_list[run], 
 76 |             A_mat = A_mat, b_vec = b_vec, alpha = alpha, ub = ub, lb = lb, 
 77 |             subsample_ratio = subsample_ratio, bootstrap = bootstrap, n_trees = n_trees, honesty = honesty, mtry = mtry, 
 78 |             min_leaf_size = min_leaf_size, max_depth = max_depth, n_proposals = n_proposals, 
 79 |                 balancedness_tol = balancedness_tol, verbose = verbose, seed = seed) for run in range(n_jobs))
 80 |     models_forest[year] = [res[0] for res in results_fit]
 81 |     times_temp = [res[1] for res in results_fit]
 82 |     times_forest[year] = pd.concat([pd.DataFrame(tt, index = [0]) for tt in times_temp])
 83 |     with open(output, 'a') as f:
 84 |         print("training time: ", time.time()-time0, file = f)
 85 |         print("time: ", times_forest[year].mean(), file = f)
 86 |         print("------------------------", file = f)
 87 | 
 88 |     time0 = time.time()
 89 |     evaluations = Parallel(n_jobs=n_jobs, verbose = 3)(delayed(evaluate_one_run)(models_forest[year][run], X_train_list[run], Y_train_list[run], 
 90 |                       X_train_list[run], Y_train_list[run],
 91 |                       X_test_list[run], Y_test_list[run],  
 92 |                      A_mat = A_mat, b_vec = b_vec, alpha = alpha) for run in range(n_jobs))
 93 |     decisions_forest[year] = [eval[0] for eval in evaluations]
 94 |     risks_temp = [eval[1] for eval in evaluations]
 95 |     risks_forest[year] = pd.concat([pd.DataFrame(rr, index = [0]) for rr in risks_temp])
 96 |     with open(output, 'a') as f:
 97 |         print("evaluation time:", time.time()-time0, file = f)
 98 |         print("risk: ", risks_forest[year].mean(), file = f)
 99 |         print("------------------------", file = f)
100 | 
101 |     pickle.dump(risks_forest[year], open(direct + date +  "downtown_risks_forest_years_"+year+".pkl", "wb"))
102 | 
103 |     feature_split_freq = evaluate_feature_split_freq(models_forest[year], p)
104 |     feature_split_all[year]  = feature_split_freq
105 | 
106 |     feature_importance = evaluate_feature_importance(models_forest[year], p)
107 |     feature_importance_all[year] = feature_importance
108 | 
109 |     pickle.dump(feature_split_all[year], open(direct + date +  "feature_split_years_"+year+".pkl", "wb"))
110 |     pickle.dump(feature_importance_all[year], open(direct + date +  "feature_imp_years_"+year+".pkl", "wb"))
111 | 
112 |     
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/uber/preprocessing.R:
--------------------------------------------------------------------------------
  1 | library(tidyverse)
  2 | library(lubridate)
  3 | library(riem)
  4 | 
  5 | all_data_list = vector("list", 8)
  6 | for (i in 1:8){
  7 |   all_data_list[[i]] = vector("list", length(file_names[[i]]))
  8 |   for (j in 1:length(file_names[[i]])){
  9 |     all_data_list[[i]][[j]] = read_csv(paste0("data/", file_names[[i]][[j]])) %>%
 10 |       mutate(Date = mdy(Date)) %>% arrange(Date)
 11 |   }
 12 | }
 13 | all_data_list_small = lapply(all_data_list, function(x) do.call(rbind, x))
 14 | all_data = do.call(rbind, all_data_list_small) %>% arrange(Date)
 15 | # check if all data are indeed downloaded
 16 | for (i in 1:length(all_data_list)){
 17 |   for (j in 1:length(all_data_list[[i]])){
 18 |     temp = all_data_list[[i]][[j]]
 19 | 
 20 |     logic1 = as.Date(temp$Date[1]) == as.Date(sd_dates[i])
 21 |     logic2 = as.Date(temp$Date[nrow(temp)]) == as.Date(ed_dates[i])
 22 |     logic3 = temp$`Origin Movement ID`[1] == edges[j, 1]
 23 |     logic4 = temp$`Destination Movement ID`[1] == edges[j, 2]
 24 | 
 25 |     if (!(logic1 & logic2 & logic3 & logic4)) cat("i", i, "j", j, "\n")
 26 |   }
 27 | }
 28 | 
 29 | ###### missing data
 30 | thin_data = all_data %>% rename(origin = `Origin Movement ID`,
 31 |                                 destination = `Destination Movement ID`,
 32 |                                 Daily = `Daily Mean Travel Time (Seconds)`,
 33 |                                 AM = `AM Mean Travel Time (Seconds)`,
 34 |                                 PM = `PM Mean Travel Time (Seconds)`,
 35 |                                 Midday = `Midday Mean Travel Time (Seconds)`,
 36 |                                 Evening = `Evening Mean Travel Time (Seconds)`,
 37 |                                 EarlyMorning = `Early Morning Mean Travel Time (Seconds)`) %>%
 38 |   select(Date, origin, destination, AM, PM, Midday, Evening, EarlyMorning)
 39 | avg_time = thin_data %>% pivot_longer(cols = c("AM", "PM", "Midday", "Evening", "EarlyMorning"),
 40 |                                       names_to = "Period", values_to = "Time") %>%
 41 |   group_by(origin, destination, Period) %>% summarise(mean_time = mean(Time, na.rm = TRUE))
 42 | avg_time_list = vector("list", nrow(edges))
 43 | for (i in 1:nrow(edges)){
 44 |   avg_time_list[[i]] = avg_time %>% filter(origin == edges[i, 1], destination == edges[i, 2])
 45 | }
 46 | avg_time = do.call(bind_rows, avg_time_list)
 47 | # using average traveling time to impute missing observations
 48 | for (i in 1:nrow(avg_time)){
 49 |   temp = avg_time[i, ]
 50 |   missing_entries = thin_data %>% filter(origin == temp$origin,
 51 |                                          destination == temp$destination) %>%
 52 |     select(temp$Period) %>% is.na()
 53 |   thin_data[(thin_data$origin == temp$origin) & (thin_data$destination == temp$destination), temp$Period][missing_entries, ] =
 54 |     temp$mean_time
 55 | }
 56 | sum(is.na(thin_data))
 57 | thin_data = thin_data %>% pivot_longer(cols = c("AM", "PM", "Midday", "Evening", "EarlyMorning"),
 58 |                                        names_to = "Period", values_to = "Time")
 59 | 
 60 | #### weather features
 61 | # https://cran.r-project.org/web/packages/riem/vignettes/riem_package.html
 62 | networks = riem_networks()
 63 | networks[grep("California", networks$name), ]
 64 | stations = riem_stations("CA_ASOS")
 65 | stations[grep("LAX", stations$id), ]
 66 | # measures <- riem_measures(station = "LAX", date_start = '2017-01-01', date_end = '2020-01-01')
 67 | # write_csv(measures, "weather_measures.csv")
 68 | measures = read_csv("weather_measures.csv")
 69 | AM = (8 <= hour(measures$valid)) & (hour(measures$valid) <= 10)
 70 | Midday = (11 <= hour(measures$valid)) & (hour(measures$valid) <= 16)
 71 | PM = (17 <= hour(measures$valid)) & (hour(measures$valid) <= 19)
 72 | Evening = ((20 <= hour(measures$valid)) & (hour(measures$valid) <= 23)) | (hour(measures$valid) == 0)
 73 | EarlyMorning = (1 <= hour(measures$valid)) & (hour(measures$valid) <= 7)
 74 | measures$Period = NA
 75 | measures[AM, "Period"] = "AM"
 76 | measures[Midday, "Period"] = "Midday"
 77 | measures[PM, "Period"] = "PM"
 78 | measures[Evening, "Period"] = "Evening"
 79 | measures[EarlyMorning, "Period"] = "EarlyMorning"
 80 | measures$Date = as.Date(measures$valid)
 81 | measures_summary = measures %>% select(Date, Period, tmpf, sknt, p01i, vsby) %>%
 82 |   group_by(Date, Period) %>%
 83 |   summarise(Temp = mean(tmpf, na.rm = T),
 84 |             WindSpeed = mean(sknt, na.rm = T),
 85 |             Rain = mean(p01i, na.rm = T),
 86 |             Visibility = mean(vsby, na.rm = T))
 87 | 
 88 | ### calendar features
 89 | measures_summary$Date = ymd(measures_summary$Date)
 90 | measures_summary$weekday = wday(measures_summary$Date)
 91 | measures_summary$month = month(measures_summary$Date)
 92 | common_X = measures_summary
 93 | 
 94 | ### lagging traveling times features
 95 | create_lags <- function(avg_time, thin_data, lag_seq = c(1, 7)){
 96 |   lag_list = vector("list", nrow(avg_time))
 97 |   for (i in 1:nrow(avg_time)){
 98 |     index = avg_time[i, ]
 99 |     temp = thin_data %>% filter(origin == index$origin,
100 |                                 destination == index$destination, Period == index$Period)
101 | 
102 |     stopifnot(sum(temp$Date != c(temp %>% arrange(Date) %>% select(Date))[[1]])==0)
103 |     lags = lapply(lag_seq, function(x) lag(temp$Time, n = x))
104 |     lags = do.call(cbind, lags)
105 |     colnames(lags) = paste0("lag", lag_seq)
106 |     lags = as_tibble(lags)
107 |     lag_list[[i]] = bind_cols(temp, lags)
108 |   }
109 |   lag_list
110 | }
111 | thin_data_list = create_lags(avg_time, thin_data, lag_seq = c(1, 7))
112 | thin_data_list_agg = vector("list", nrow(edges))
113 | for (i in 1:nrow(edges)){
114 |   index = ((i-1)*5 + 1):(i*5)
115 |   thin_data_list_agg[[i]] = do.call(rbind, thin_data_list[index]) %>% arrange(Date, Period)
116 | }
117 | for (i in 1:nrow(edges)){
118 |   thin_data_list_agg[[i]] = thin_data_list_agg[[i]][complete.cases(thin_data_list_agg[[i]]), ]
119 | }
120 | outcomes_list = vector("list", nrow(edges))
121 | for (i in 1:nrow(edges)){
122 |   origin = thin_data_list_agg[[i]]$origin[1]
123 |   destination = thin_data_list_agg[[i]]$destination[1]
124 |   stopifnot({
125 |     origin == edges[i, 1]
126 |     destination == edges[i, 2]
127 |   })
128 |   temp = thin_data_list_agg[[i]] %>% select(Time, starts_with("lag"))
129 |   colnames(temp) =
130 |     c(paste0("Y", "_origin", origin, "_destination", destination),
131 |       paste0("lag", c(1, 7), "_origin", origin, "_destination", destination))
132 |   outcomes_list[[i]] = temp
133 | }
134 | identifiers = thin_data_list_agg[[1]] %>% select(Date, origin, destination, Period)
135 | outcomes = do.call(bind_cols, outcomes_list)
136 | X_specific = outcomes %>% select(starts_with("lag"))
137 | X_common = identifiers %>% select(Date, Period) %>%  left_join(common_X, by = c("Date", "Period"))
138 | X = bind_cols(X_common, X_specific)
139 | Y = outcomes %>% select(starts_with("Y"))
140 | 
141 | ### create datasets
142 | X %>% filter((year(X$Date) == 2019)&(month(X$Date) >= 7)) %>%
143 |   select(c("Period", "Temp", "WindSpeed",
144 |            "Rain", "Visibility", "weekday", "month"),
145 |          starts_with("lag1"), starts_with("lag7")) %>% write_csv("X_halfyear.csv")
146 | Y %>% filter((year(X$Date) == 2019)&(month(X$Date) >= 7)) %>%
147 |   write_csv("Y_halfyear.csv")
148 | 
149 | X %>% filter(year(X$Date) == 2019) %>%
150 |   select(c("Period", "Temp", "WindSpeed",
151 |            "Rain", "Visibility", "weekday", "month"),
152 |          starts_with("lag1"), starts_with("lag7")) %>% write_csv("X_oneyear.csv")
153 | Y %>% filter(year(X$Date) == 2019) %>% write_csv("Y_oneyear.csv")
154 | 
155 | X %>% filter((year(X$Date) == 2019) | ( (year(X$Date) == 2018) & (month(X$Date) >= 7) )) %>%
156 |   select(c("Period", "Temp", "WindSpeed",
157 |            "Rain", "Visibility", "weekday", "month"),
158 |          starts_with("lag1"), starts_with("lag7")) %>% write_csv("X_onehalfyear.csv")
159 | Y %>% filter((year(X$Date) == 2019) | ( (year(X$Date) == 2018) & (month(X$Date) >= 7) )) %>%
160 |   write_csv("Y_onehalfyear.csv")
161 | 
162 | X %>% filter((year(X$Date) == 2019) | (year(X$Date) == 2018)) %>%
163 |   select(c("Period", "Temp", "WindSpeed",
164 |            "Rain", "Visibility", "weekday", "month"),
165 |          starts_with("lag1"), starts_with("lag7")) %>% write_csv("X_twoyear.csv")
166 | Y %>% filter((year(X$Date) == 2019) | (year(X$Date) == 2018)) %>% write_csv("Y_twoyear.csv")
167 | 
168 | 


--------------------------------------------------------------------------------