├── .gitignore ├── Figure 1 Table 2 Figure 4 Labels.ipynb ├── Figure 6-7-8 Scenarios.ipynb ├── Figure 9 Historical performance.ipynb ├── MLvsGARCHecon ├── MLvsGARCH_econ02.R ├── MLvsGARCH_evt.R ├── MLvsGARCH_evt_02.R ├── MLvsGARCHecon.R ├── MLvsGARCHecon_analysis.R ├── Metainfo.txt ├── README.md ├── READMEEVT.md ├── btc_log_returns.png ├── btc_qqplot.png ├── btc_qqplot_estgarch.png ├── config.json ├── core.R ├── definition.R ├── res_squared_p_acf.png └── tools.py ├── MLvsGARCHml └── utils.py ├── Metainfo.txt ├── README.md ├── README2.md ├── Table 3-4-5-6.ipynb ├── Table 7 Backtest performance.ipynb ├── constant.py ├── data └── btc_1H_20160101_20200217.csv ├── hist_performance.p ├── min_tpr_exceedance_alpha.png ├── requirements.txt ├── result ├── compare.py ├── cvdates.p ├── final_table.p └── utils.py ├── strat_excess_perf_downtrend.png ├── strat_excess_perf_uptrend.png ├── strat_historical_perf.png ├── strat_switch_ens_varspread.png └── tvar_0.01_w.png /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .idea 6 | MLvsGARCHml/saved_models/* 7 | MLvsGARCHml/.ipynb_checkpoints/* 8 | MLvsGARCHml/__pycache__/* 9 | /__pycache__/* 10 | MLvsGARCHcomp/.ipynb_checkpoints/* 11 | MLvsGARCHcomp/__pycache__/* 12 | 13 | MLvsGARCHecon/saved_models/* 14 | MLvsGARCHecon/.Rproj.user/* 15 | MLvsGARCHecon/.RData 16 | MLvsGARCHecon/.Rhistory 17 | venv/* 18 | 19 | -------------------------------------------------------------------------------- /MLvsGARCHecon/MLvsGARCH_econ02.R: -------------------------------------------------------------------------------- 1 | # VaR for a Generalized Pareto Distribution (GDP) 2 | var.normal = function(probs, mean, sd) 3 | { 4 | var = mean + sd * qnorm(p = probs) 5 | return(var) 6 | } 7 | 8 | # VaR for a Generalized Pareto Distribution (GDP) 9 | var.gpd = function(probs, threshold, scale, shape, n, Nu) 10 | { 11 | var = threshold + (scale / shape) * (((n / Nu) * (1 - probs)) ^ (-shape) - 1) 12 | return(var) 13 | } 14 | 15 | fit_predict = function(data.series, 16 | next.return.std, 17 | return.sd, 18 | model="garch", 19 | distribution="QMLE", 20 | arma_order=c(0,0), 21 | garch_order=c(1,0), 22 | q_fit = 0.1, 23 | qs = c(0.1)) { 24 | 25 | ####################################################################### 26 | ### Check that model and distribution are allowed 27 | valid.models = c("garch", "gjr", "fam", "cs") 28 | valid.distributions = c("norm", "std", "QMLE") 29 | if (!(model %in% valid.models)) 30 | { 31 | stop("Invalid model chosen") 32 | } 33 | if (!(distribution %in% valid.distributions)) 34 | { 35 | stop("Invalid conditional distribution chosen") 36 | } 37 | ####################################################################### 38 | 39 | 40 | 41 | ####################################################################### 42 | # Fit models and extract needed parameters (and residuals) 43 | 44 | ########################################################## 45 | ####### Use package fGarch for plain garch model 46 | 47 | 48 | formula = substitute(~arma(p,q) + garch(a,b), 49 | list(p=arma_order[1], 50 | q=arma_order[2], 51 | a=garch_order[1], 52 | b=garch_order[2])) 53 | 54 | if ((model == "garch")) 55 | { 56 | if (distribution == "norm") # Normal innovations 57 | { 58 | fitted.model = garchFit(formula = formula, 59 | data = data.series, 60 | cond.dist = "norm", 61 | trace = FALSE) 62 | } 63 | 64 | if (distribution == "std") # Student t innovations 65 | { 66 | fitted.model = garchFit(formula = formula, 67 | data = data.series, 68 | cond.dist = "std", 69 | shape = df, 70 | include.shape = FALSE, 71 | trace = FALSE) 72 | } 73 | 74 | if (distribution == "QMLE") # QMLE estimation 75 | { 76 | fitted.model = garchFit(formula = formula, 77 | data = data.series, 78 | cond.dist = "QMLE", 79 | trace = FALSE) 80 | } 81 | 82 | # Produce forecasts of mean and standard deviation 83 | model.forecast = fGarch::predict(object = fitted.model, n.ahead = 1) 84 | 85 | 86 | model.mean = model.forecast$meanForecast 87 | model.sd = model.forecast$standardDeviation 88 | 89 | # Get residuals (for EVT): standardize through (time dependent) fitted values 90 | # and standard deviations 91 | # model.residuals = fGarch::residuals(fitted.model, standardize=TRUE) 92 | est_sigma = fitted.model@sigma.t 93 | stdres = data.series / est_sigma 94 | } else { 95 | 96 | ########################################################## 97 | ####### Use package rugarch for other models 98 | 99 | # AR(1) - GJR-GARCH(1,1) model 100 | if (model == "gjr") 101 | { 102 | if (distribution == "norm") # Normal innovations 103 | { 104 | fitted.model = ugarchfit(spec=gjr.spec.norm, 105 | data=data.series, 106 | solver=slvr, 107 | solver.control=slvr.ctrl) 108 | } 109 | if (distribution == "std") # Student t innovations 110 | { 111 | fitted.model = ugarchfit(spec=gjr.spec.std, 112 | data=data.series, 113 | solver=slvr, 114 | solver.control=slvr.ctrl) 115 | } 116 | 117 | } 118 | # AR(1) - component-GARCH(1,1) model 119 | if (model == "cs") 120 | { 121 | if (distribution == "norm") # Normal innovations 122 | { 123 | fitted.model = ugarchfit(spec=cs.spec.norm, 124 | data=data.series, 125 | solver=slvr, 126 | solver.control=slvr.ctrl) 127 | } 128 | if (distribution == "std") # Student t innovations 129 | { 130 | fitted.model = ugarchfit(spec=cs.spec.std, 131 | data=data.series, 132 | solver=slvr, 133 | solver.control=slvr.ctrl) 134 | } 135 | } 136 | 137 | # Make forecasts of tomorrow's expected value and standard deviation 138 | # for models from rugarch package 139 | model.forecast = ugarchforecast(fitted.model, n.ahead=1) 140 | model.mean = model.forecast@forecast$forecasts[[1]]$series 141 | model.sd = model.forecast@forecast$forecasts[[1]]$sigma 142 | 143 | # Get residuals (for EVT): standardize through (time dependent) fitted values 144 | # and standard deviations 145 | # model.residuals = rugarch::residuals(fitted.model, standardize=TRUE) 146 | est_sigma = fitted.model@sigma.t 147 | stdres = data.series / est_sigma 148 | } 149 | 150 | ####################################################################### 151 | # Now calculate VaR, ES, VaR-break, ES difference, and excess residuals 152 | 153 | ############################################################ 154 | # Peak-Over-Threshold estimates 155 | prediction_i = c(date, next.return.std[1], return.sd[1], model.mean, model.sd)#, model.var, model.break) 156 | 157 | 158 | for (j in 1:length(qs)) { 159 | q = qs[j] 160 | k = length(stdres) * (1 - q_fit) # Determine threshold: fit GPD on q_fit of the data 161 | EVTmodel.threshold = (sort(stdres, decreasing = TRUE))[(k + 1)] 162 | # Fit GPD to residuals 163 | EVTmodel.fit = gpd.fit( 164 | xdat = stdres, 165 | threshold = EVTmodel.threshold, 166 | npy = NULL, 167 | show = FALSE 168 | ) 169 | 170 | ############################################################ 171 | # Base model estimates 172 | 173 | if (distribution == "norm" || distribution == "QMLE") 174 | { 175 | model.var = var.normal(q, mean=model.mean, sd=model.sd) 176 | 177 | #model.es = es.normal(mean=model.mean, sd=model.sd, probs=qs) 178 | 179 | } 180 | 181 | if (distribution == "std") 182 | { 183 | model.var = var.student(q, mean=model.mean, sd=model.sd, df=df) 184 | #model.es = es.student(mean=model.mean, sd=model.sd, probs=qs, df=df) 185 | } 186 | 187 | # VaR-break 188 | model.break = (next.return.std > model.var) 189 | 190 | # Difference between actual loss and ES estimate 191 | #model.diff = (next.return.std - model.es) 192 | # Excess residuals (page 294, McNeil-Frey) 193 | #model.exres = model.diff / model.sd 194 | 195 | 196 | # EVT ESTIMATES 197 | 198 | # Extract scale and shape parameter estimates 199 | EVTmodel.scale = EVTmodel.fit$mle[1] 200 | EVTmodel.shape = EVTmodel.fit$mle[2] 201 | # Estimate quantiles 202 | Nu = EVTmodel.fit$nexc 203 | EVTmodel.zq = var.gpd(q, EVTmodel.threshold, EVTmodel.scale, EVTmodel.shape, length(data.series), Nu) 204 | # Calculate VaR 205 | EVTmodel.var = model.mean + model.sd * EVTmodel.zq 206 | # Calculate the Expected Shortfall 207 | # EVTmodel.es = model.mean + model.sd * es.gpd(var=EVTmodel.zq, threshold=EVTmodel.threshold, scale=EVTmodel.scale, shape=EVTmodel.shape) 208 | # VaR-break 209 | EVTmodel.break = (next.return.std > EVTmodel.var) 210 | 211 | # Difference between actual loss and ES estimate 212 | #EVTmodel.diff = (next.return.std - EVTmodel.es) 213 | 214 | # Exceedance residuals (page 294, McNeil-Frey) 215 | # Note that these are created for all observations; 216 | # only those on dates of VaR-breaks should be used 217 | # in the bootstrap test 218 | #EVTmodel.exres = EVTmodel.diff / model.sd 219 | 220 | q_data = c(model.var, model.break, EVTmodel.threshold, EVTmodel.var, EVTmodel.break, EVTmodel.zq) 221 | prediction_i = c(prediction_i, q_data) 222 | } 223 | 224 | return (prediction_i) 225 | } -------------------------------------------------------------------------------- /MLvsGARCHecon/MLvsGARCH_evt.R: -------------------------------------------------------------------------------- 1 | rm(list = ls(all = TRUE)) 2 | graphics.off() 3 | 4 | set.seed(10) 5 | 6 | library(POT) 7 | library(ismev) 8 | library(fGarch) 9 | library(MLmetrics) 10 | source("./definition.R") 11 | 12 | # Constants 13 | day = 24 14 | month = day*30 15 | 16 | TEST = FALSE 17 | 18 | fit_pred = function() { 19 | fitted.model = garchFit( 20 | formula = ~ arma(3, 1) + garch(1, 2), 21 | data = data.series, 22 | cond.dist = "QMLE", 23 | trace = FALSE 24 | ) 25 | # Predict next value 26 | model.forecast = fGarch::predict(object = fitted.model, n.ahead = 1) 27 | model.mean = model.forecast$meanForecast #serie 28 | model.sd = model.forecast$standardDeviation #volatility 29 | 30 | # get standardized residuals 31 | est_sigma = fitted.model@sigma.t 32 | stdres = data.series / est_sigma 33 | #Fit gpd to residuals over threshold 34 | # Determine threshold 35 | 36 | #prediction[count, 1] = date 37 | #prediction[count, 2] = next.return 38 | #prediction[count, 3] = return.sd 39 | 40 | prediction_i = c(date, next.return[1], return.sd[1]) 41 | 42 | for (j in 1:length(qs)) { 43 | q = qs[j] 44 | # EVTmodel.threshold = quantile(data.series, q) 45 | # k = sum(data.series >= EVTmodel.threshold) 46 | k = length(stdres) * (1 - q_fit) 47 | EVTmodel.threshold = (sort(stdres, decreasing = TRUE))[(k + 1)] 48 | # Fit GPD to residuals 49 | EVTmodel.fit = gpd.fit( 50 | xdat = stdres, 51 | threshold = EVTmodel.threshold, 52 | npy = NULL, 53 | show = FALSE 54 | ) 55 | # Extract scale and shape parameter estimates 56 | EVTmodel.scale = EVTmodel.fit$mle[1] 57 | EVTmodel.shape = EVTmodel.fit$mle[2] 58 | # Estimate quantiles 59 | Nu = EVTmodel.fit$nexc 60 | 61 | #EVTmodel.zq = qgpd(q, loc = EVTmodel.threshold, scale = EVTmodel.scale, shape = EVTmodel.shape) 62 | EVTmodel.zq = var.gpd(q, EVTmodel.threshold, EVTmodel.scale, EVTmodel.shape, n, Nu) 63 | 64 | # Predict return value 65 | EVTmodel.var = model.mean + model.sd * EVTmodel.zq 66 | model.var = var.normal(probs = q, 67 | mean = model.mean, 68 | sd = model.sd) 69 | # Calculate the Expected Shortfall 70 | model.es = es.normal(probs=q, mean=model.mean, sd=model.sd) 71 | EVTmodel.es = model.mean + model.sd * es.gpd(var=EVTmodel.zq, 72 | threshold=EVTmodel.threshold, 73 | scale=EVTmodel.scale, 74 | shape=EVTmodel.shape) 75 | 76 | # predicted_value = model.sd * EVTmodel.zq 77 | #prediction[count, ((j-1) * 2 + 4)] = predicted_value_mean 78 | #prediction[count, ((j-1) * 2 + 5)] = predicted_norm 79 | 80 | q_data = c(EVTmodel.threshold, EVTmodel.var, EVTmodel.es, model.var, model.es, model.mean , model.sd, EVTmodel.zq) 81 | 82 | prediction_i = c(prediction_i, q_data) 83 | } 84 | 85 | return (prediction_i) 86 | } 87 | 88 | # Load variables and helper functions 89 | dset = "../data/btc_1H_20160101_20190101" 90 | 91 | 92 | dset.name = paste("./", dset, ".csv", sep = "") 93 | dataset <- read.csv(dset.name, header = TRUE) 94 | dates = dataset$X 95 | dataset = data.frame("close" = dataset$close) 96 | rownames(dataset) = dates 97 | dataset <- timeSeries::as.timeSeries(dataset, FinCenter = "GMT") 98 | 99 | 100 | # Fit model on one month history 101 | window_size = 4 * month 102 | q_fit = 0.1 # fit to 10% of worst outcomes 103 | 104 | # Convert price series to loss series 105 | dataset = na.omit(diff(log(dataset))) 106 | 107 | # Forget about 2016 108 | dataset = dataset[rownames(dataset) >= '2017-05-01 00:00:00', 1] 109 | 110 | 111 | length.dataset = length(dataset[, 1]) 112 | dates = rownames(dataset) 113 | 114 | 115 | 116 | # Split data 117 | test_size = (length(dataset) - window_size) 118 | qs = c(0.90, 0.95, 0.975, 0.99) 119 | prediction = matrix(nrow = test_size, ncol = (3 + (8 * length(qs)))) 120 | 121 | 122 | if (TEST){ 123 | test_size = 20 124 | } 125 | 126 | count = 1 127 | time <- Sys.time() 128 | save_path = gsub(' ', '', gsub('-', '', gsub(':', '', time))) 129 | for (i in (window_size + 1):(window_size + test_size)) { 130 | if (i %% 1000 == 0) { 131 | print((length(dataset) - i)) 132 | print(prediction[(i - window_size - 1), ]) 133 | write.csv(prediction, paste0(save_path, "prediction_online_first.csv"), row.names = FALSE) 134 | print(head(prediction)) 135 | } 136 | n = window_size 137 | 138 | data.series = dataset[(i - window_size):(i - 1), 1] 139 | 140 | # Normalize entire dataset to have variance one 141 | return.sd = apply(data.series, 2, sd) 142 | data.series = data.series$close / return.sd 143 | next.return = dataset[i] / return.sd 144 | date = dates[i] 145 | # Fit model and get prediction 146 | if (!TEST){ 147 | prediction_i = tryCatch( 148 | 149 | fit_pred(), 150 | 151 | error = function(e) 152 | base::rep(NA, ncol(prediction)), #function(e) base::print('ERROR'), 153 | 154 | silent = FALSE) 155 | } 156 | if (TEST){ 157 | prediction_i = fit_pred() 158 | } 159 | prediction[count,] = as.vector(prediction_i) 160 | count = count + 1 161 | } 162 | 163 | data_pred = prediction[!is.na(prediction[, 1]),] 164 | dates = data_pred[, 1] 165 | data_pred = data_pred[,-1] 166 | mode(data_pred) = "double" 167 | df = data.frame(data_pred) 168 | cn = c() 169 | for (q in c('10%', '5%', '2.5%', '1%')){ 170 | cn = c(cn, 171 | paste0("threshold_", q), 172 | paste0("evt_var_", q), 173 | paste0("evt_es_", q), 174 | paste0("var_", q), 175 | paste0("es_", q), 176 | paste0("mean_", q), 177 | paste0("sd_", q), 178 | paste0("zq_", q) 179 | ) 180 | } 181 | colnames(df) = c( 182 | "std_losses", 183 | "norm_sd", 184 | cn 185 | ) 186 | 187 | rownames(df) = dates 188 | 189 | 190 | ######### SAVE 191 | if (!TEST) { 192 | write.csv(df, paste0(save_path, "_prediction_10per.csv"), row.names = TRUE) 193 | } -------------------------------------------------------------------------------- /MLvsGARCHecon/MLvsGARCH_evt_02.R: -------------------------------------------------------------------------------- 1 | rm(list = ls(all = TRUE)) 2 | graphics.off() 3 | 4 | set.seed(10) 5 | 6 | library(POT) 7 | library(ismev) 8 | library(fGarch) 9 | library(MLmetrics) 10 | source("./definition.R") 11 | 12 | TEST = FALSE 13 | 14 | # Constants 15 | day = 24 16 | month = day*30 17 | 18 | 19 | fit_pred = function() { 20 | prediction_i = c(date, next.return[1], return.sd[1]) 21 | fitted.model = garchFit( 22 | formula = ~ arma(3, 1) + garch(1, 2), 23 | data = data.series, 24 | cond.dist = "QMLE", 25 | trace = FALSE 26 | ) 27 | # Get standardized residuals 28 | model.residuals = fGarch::residuals(fitted.model , standardize = TRUE) 29 | model.coef = coef(fitted.model) 30 | # Predict next value 31 | model.forecast = fGarch::predict(object = fitted.model, n.ahead = 1) 32 | model.mean = model.forecast$meanForecast # conditional mean 33 | model.sd = model.forecast$standardDeviation # conditional volatility 34 | 35 | # Fit gpd to residuals over threshold 36 | # Determine threshold 37 | 38 | EVTmodel.threshold = quantile(model.residuals, (1 - q_fit)) 39 | 40 | # Fit GPD to residuals 41 | EVTmodel.fit = gpd.fit( 42 | xdat = model.residuals, 43 | threshold = EVTmodel.threshold, 44 | show = FALSE 45 | ) 46 | # Extract scale and shape parameter estimates 47 | EVTmodel.scale = EVTmodel.fit$mle[1] 48 | EVTmodel.shape = EVTmodel.fit$mle[2] 49 | # Estimate quantiles 50 | Nu = EVTmodel.fit$nexc 51 | q = 0.90 52 | # Extract scale and shape parameter estimates 53 | EVTmodel.scale = EVTmodel.fit$mle[1] 54 | EVTmodel.shape = EVTmodel.fit$mle[2] 55 | # Estimate quantiles 56 | Nu = EVTmodel.fit$nexc 57 | EVTmodel.zq = var.gpd(q, EVTmodel.threshold, EVTmodel.scale, EVTmodel.shape, n, Nu) 58 | 59 | # Calculate the Value-At-Risk 60 | EVTmodel.var = model.mean + model.sd * EVTmodel.zq 61 | model.var = var.normal(probs = q, 62 | mean = model.mean, 63 | sd = model.sd) 64 | 65 | # Calculate the Expected Shortfall 66 | model.es = es.normal(probs=q, mean=model.mean, sd=model.sd) 67 | EVTmodel.es = model.mean + model.sd * es.gpd(var=EVTmodel.zq, 68 | threshold=EVTmodel.threshold, 69 | scale=EVTmodel.scale, 70 | shape=EVTmodel.shape) 71 | 72 | # Calculate proba 73 | model.proba = pnorm(( lower[1] - model.mean) / model.sd) 74 | model.proba = 1 - model.proba 75 | EVTmodel.proba = pgpd(( lower[1] - model.mean) / model.sd, 76 | loc = EVTmodel.threshold, 77 | scale = EVTmodel.scale, 78 | shape = EVTmodel.shape) 79 | EVTmodel.proba = 1 - EVTmodel.proba 80 | EVTmodel.proba.est = tail.gpd((lower[1] - model.mean) / model.sd, 81 | EVTmodel.threshold, 82 | EVTmodel.scale, 83 | EVTmodel.shape, 84 | n, 85 | Nu) 86 | q_data = c(EVTmodel.threshold, EVTmodel.var, EVTmodel.es, EVTmodel.proba, EVTmodel.proba.est, 87 | model.var, model.es, model.proba, model.mean , model.sd, EVTmodel.zq) 88 | 89 | prediction_i = c(prediction_i, q_data) 90 | 91 | 92 | return (prediction_i) 93 | } 94 | 95 | # Load variables and helper functions 96 | dset = "../data/new_btc_1H_lower_20160101_20190101" 97 | dset.name = paste("./", dset, ".csv", sep = "") 98 | dataset <- read.csv(dset.name, header = TRUE) 99 | dates = dataset$X 100 | dataset = data.frame("close" = dataset$close, "lower" = dataset$lower) 101 | rownames(dataset) = dates 102 | dataset <- timeSeries::as.timeSeries(dataset, FinCenter = "GMT") 103 | 104 | # Fit model on 4 months history 105 | window_size = 4 * month 106 | q_fit = 0.2 # fit to 10% of worst outcomes 107 | 108 | # Convert price series to losses and lower (negative) threshold to a positive threshold 109 | dataset = cbind(dataset, c(NaN, - diff(log(dataset$close)))) 110 | colnames(dataset) = c("close", "lower", "returns") 111 | dataset$lower = - dataset$lower 112 | 113 | # Forget about 2016 114 | dataset = dataset[rownames(dataset) >= '2017-01-01 00:00:00', c("returns", "lower")] 115 | # dataset = dataset[rownames(dataset) <= '2018-12-04 00:00:00', c("returns", "lower")] 116 | 117 | length.dataset = nrow(dataset) 118 | dates = rownames(dataset) 119 | 120 | # Split data 121 | test_size = length.dataset - window_size 122 | 123 | qs = c(0.90) 124 | prediction = matrix(nrow = test_size, ncol = (3 + (11 * length(qs)))) 125 | 126 | 127 | if (TEST){ 128 | test_size = 20 129 | } 130 | 131 | count = 1 132 | time <- Sys.time() 133 | save_path = gsub(' ', '', gsub('-', '', gsub(':', '', time))) 134 | for (i in (window_size + 1):(window_size + test_size)) { 135 | if ((test_size + window_size - i) %% 250 == 0){ 136 | print(paste0("Steps to go: ", test_size + window_size - i)) 137 | } 138 | if (i %% 1000 == 0) { 139 | print(paste("Saving prediction at step", i)) 140 | write.csv(prediction, paste0(save_path, "prediction_online_first.csv"), row.names = FALSE) 141 | } 142 | n = window_size 143 | 144 | data.series = dataset[(i - window_size):(i - 1), "returns"] 145 | # Normalize entire dataset to have variance one 146 | return.sd = apply(data.series, 2, sd) 147 | data.series = data.series$returns / return.sd 148 | next.return = dataset[i, "returns"] 149 | lower = dataset[i - 1, "lower"]/ return.sd 150 | date = dates[i] 151 | # Fit model and get prediction 152 | if (!TEST){ 153 | prediction_i = tryCatch( 154 | 155 | fit_pred(), 156 | 157 | error = function(e) 158 | base::rep(NA, ncol(prediction)), #function(e) base::print('ERROR'), 159 | 160 | silent = FALSE) 161 | } 162 | if (TEST){ 163 | prediction_i = fit_pred() 164 | } 165 | prediction[count,] = as.vector(prediction_i) 166 | count = count + 1 167 | } 168 | 169 | data_pred = prediction[!is.na(prediction[, 1]),] 170 | dates = data_pred[, 1] 171 | data_pred = data_pred[,-1] 172 | mode(data_pred) = "double" 173 | df = data.frame(data_pred) 174 | cn = c() 175 | for (q in qs){ 176 | cn = c(cn, 177 | paste0("threshold_", q), 178 | paste0("evt_var_", q), 179 | paste0("evt_es_", q), 180 | paste0("evt_proba_", q), 181 | paste0("evt_proba_est_", q), 182 | paste0("var_", q), 183 | paste0("es_", q), 184 | paste0("proba_", q), 185 | paste0("mean_", q), 186 | paste0("sd_", q), 187 | paste0("zq_", q) 188 | ) 189 | } 190 | colnames(df) = c( 191 | "std_losses", 192 | "norm_sd", 193 | cn 194 | ) 195 | 196 | rownames(df) = dates 197 | 198 | ######### SAVE 199 | if (TEST){ 200 | write.csv(df, paste0( 201 | paste0('./', 202 | save_path), 203 | "_prediction_10per_proba_TEST.csv" 204 | ), 205 | row.names = TRUE) 206 | } else { 207 | write.csv(df, paste0( 208 | paste0('./saved_models/', 209 | save_path), 210 | "_prediction_qfit_02.csv" 211 | ), 212 | row.names = TRUE) 213 | } 214 | -------------------------------------------------------------------------------- /MLvsGARCHecon/MLvsGARCHecon.R: -------------------------------------------------------------------------------- 1 | rm(list = ls(all = TRUE)) 2 | graphics.off() 3 | 4 | # install and load packages 5 | libraries = c( "rjson", "timeSeries") 6 | lapply(libraries, function(x) if (!(x %in% installed.packages())) { 7 | install.packages(x) 8 | }) 9 | lapply(libraries, library, quietly = TRUE, character.only = TRUE) 10 | 11 | source('./core.R') 12 | 13 | hour = 1 14 | day = hour*24 15 | week = day*7 16 | month = week*4 17 | 18 | 19 | ####### data 20 | ret = get_returns("../data/btc_1H_20160101_20190101.csv") 21 | 22 | ####### model parameters 23 | distribution = 'sstd' 24 | armaOrder = c(3, 1) 25 | n_train = 6*month #length(ret$close[(ret$dates <=cv[1])]) 26 | comments = paste0('FINAL') 27 | 28 | ####### cross validation folds 29 | 30 | dir_ = '24072019-111446-labelQuantile_cv_day_log_class_weighted' 31 | cv <- fromJSON(file = paste0(paste0("../MLvsGARCHml/saved_models/", dir_), "/global_dates.json")) 32 | nb_cv = length(cv) 33 | ####### Refit frequency of GARCH model 34 | every = nb_cv 35 | 36 | start_time_total <- Sys.time() 37 | results = cv_prediction(ret, cv, armaOrder, distribution, every, 38 | n_train = n_train, save = FALSE, comments = comments) 39 | end_time_total <- Sys.time() 40 | print('Total time:') 41 | print(end_time_total - start_time_total) 42 | 43 | -------------------------------------------------------------------------------- /MLvsGARCHecon/MLvsGARCHecon_analysis.R: -------------------------------------------------------------------------------- 1 | rm(list = ls(all = TRUE)) 2 | graphics.off() 3 | 4 | # install and load packages 5 | libraries = c("rugarch", "FinTS") 6 | lapply(libraries, function(x) if (!(x %in% installed.packages())) { 7 | install.packages(x) 8 | }) 9 | lapply(libraries, library, quietly = TRUE, character.only = TRUE) 10 | 11 | # load dataset 12 | path = "../data/btc_1H_train_0.csv" 13 | data = read.csv(file=path, header=TRUE, sep=",", dec=".") 14 | close = data[1:nrow(data),"close"] 15 | date1 = as.Date(c(levels(data[1:nrow(data),"X"]))) 16 | # take log returns 17 | ret = diff(log(close)) 18 | ret = na.omit(ret) 19 | # plot of btc return 20 | plot(date1[2:length(date1)], ret, type = 'l', xlab='time', ylab='btc log returns') 21 | 22 | par(mfrow = c(1, 2)) 23 | # histogram of returns 24 | hist(ret, col = "grey", breaks = 40, freq = FALSE, xlab = NA) 25 | lines(density(ret), lwd = 2) 26 | mu = mean(ret) 27 | sigma = sd(ret) 28 | x = seq(-4, 4, length = 100) 29 | curve(dnorm(x, mean = mu, sd = sigma), add = TRUE, col = "darkblue", 30 | lwd = 2) 31 | # qq-plot 32 | par(pty="s") 33 | d = (ret - mu)/ sigma 34 | plot(qnorm(seq(0,1, length.out=length(d))), 35 | d[order(d)], xlim = c(-15,15), ylim = c(-15,15), main = NULL, 36 | ylab = "Sample Quantiles", xlab = "Theoretical Quantiles") 37 | 38 | lines(qnorm(seq(0,1, length.out=length(d))), qnorm(seq(0,1, length.out=length(d)))) 39 | lines(seq(-15, 40 | qnorm(seq(0,1, length.out=length(d)))[2], 41 | length.out=length(d) 42 | ), 43 | seq(-15, 44 | qnorm(seq(0,1, length.out=length(d)))[2], 45 | length.out=length(d) 46 | )) 47 | 48 | lines(seq(qnorm(seq(0,1, length.out=length(d)))[length(d) - 1], 49 | 15, 50 | length.out=length(d) 51 | ), 52 | seq(qnorm(seq(0,1, length.out=length(d)))[length(d) - 1], 53 | 15, 54 | length.out=length(d) 55 | )) 56 | 57 | 58 | par(pty="s") 59 | d = (ret - mu)/ sigma 60 | plot(qnorm(seq(0,1, length.out=length(d))), 61 | d[order(d)], xlim = c(-15,15), ylim = c(-15,15), main = NULL, 62 | ylab = "Sample Quantiles", xlab = "Theoretical Quantiles") 63 | 64 | abline(coef = c(0,1)) 65 | 66 | #lines(qnorm(seq(0,1, length.out=length(d))), qnorm(seq(0,1, length.out=length(d)))) 67 | #lines(qnorm(seq(0,1, length.out=length(d))), qnorm(seq(0,1, length.out=length(d)))) 68 | 69 | 70 | 71 | 72 | 73 | qqnorm((ret - mu)/ sigma, xlim = c(-15,15), ylim = c(-15,15), main = NULL) 74 | qqline((ret - mu)/ sigma, probs = c(0.01, 0.99)) 75 | 76 | 77 | 78 | 79 | # Fit ARIMA model 80 | order = c(3, 0, 1) # arimaorder(fit) 81 | ARIMAfit <- arima(ret, order = order) 82 | summary(ARIMAfit) 83 | 84 | # vola cluster 85 | par(mfrow = c(1, 1)) 86 | res = ARIMAfit$residuals 87 | res2 = ARIMAfit$residuals^2 88 | plot(res, ylab = NA, type = 'l') 89 | plot(res2, ylab='Squared residuals', main=NA) 90 | 91 | par(mfrow = c(1, 2)) 92 | acfres2 = acf(res2, main = NA, lag.max = 20, ylab = "Sample Autocorrelation", 93 | lwd = 2) 94 | pacfres2 = pacf(res2, lag.max = 20, ylab = "Sample Partial Autocorrelation", 95 | lwd = 2, main = NA) 96 | 97 | # arch effect 98 | res = ARIMAfit$residuals 99 | ArchTest(res) #library FinTS 100 | Box.test(res2, type = "Ljung-Box") 101 | 102 | # We reject null hypothesis of both Archtest and Ljung-Box => autocorrelation in the squared residuals 103 | 104 | # EtGarch 105 | 106 | #fit the rugarch eGarch model with skew student distribution 107 | spec = ugarchspec(mean.model = list(armaOrder = c(3,1)), 108 | variance.model = list(model = 'eGARCH', 109 | garchOrder = c(1,2)), 110 | distribution = 'sstd') 111 | essgarch12 <- ugarchfit(spec, ret, solver = 'hybrid') 112 | 113 | # qq plot 114 | par(pty="s") 115 | plot(essgarch12, which = 9)#, xlim = c(-15,15)) 116 | 117 | 118 | # To control plot param need to call qdist and .qqLine 119 | zseries = as.numeric(residuals(essgarch12, standardize=TRUE)) 120 | distribution = essgarch12@model$modeldesc$distribution 121 | idx = essgarch12@model$pidx 122 | pars = essgarch12@fit$ipars[,1] 123 | skew = pars[idx["skew",1]] 124 | shape = pars[idx["shape",1]] 125 | if(distribution == "ghst") ghlambda = -shape/2 else ghlambda = pars[idx["ghlambda",1]] 126 | 127 | par(mfrow = c(1, 1), pty="s") 128 | n = length(zseries) 129 | x = qdist(distribution = distribution, lambda = ghlambda, 130 | skew = skew, shape = shape, p = ppoints(n))[order(order(zseries))] 131 | plot(x, zseries, ylim = c(-15,15), ylab="Sample Quantiles", xlab="Theoretical Quantiles") 132 | rugarch:::.qqLine(y = zseries, dist = distribution, datax = TRUE, lambda = ghlambda, 133 | skew = skew, shape = shape) 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /MLvsGARCHecon/Metainfo.txt: -------------------------------------------------------------------------------- 1 | Name of Quantlet: 'MLvsGARCHecon' 2 | 3 | Published in: 'Crypto volatility forecasting: ML vs GARCH' 4 | 5 | Description: 'Do a first econometrics analysis of btc log returns following Chen et al (2017) in `MLvsGARCHecon_1.R`, then build a rolling forecast with selected model `MLvsGARCHecon_2.R`' 6 | 7 | Keywords: 'econometrics, GARCH, EGARCH, volatility forecasting, realized volatility, cryptocurrency, btc' 8 | 9 | Author: 'Bruno Spilak' 10 | 11 | See also: 12 | - MLvsGARCHml 13 | - MLvsGARCHcomp 14 | - econ-tgarch 15 | - econ_arch 16 | - econ_arima 17 | - econ_ccgar 18 | - econ_crix 19 | - econ_garch 20 | - econ_vola 21 | 22 | Submitted: '12.07.2019' 23 | 24 | Datafile: '`../data/btc_1H_20160101_20190101.csv`: candle price of btc with 1 hour frequency' 25 | 26 | Input: 'The reader can modify the parameters of the model in each code file.' 27 | 28 | Output: 29 | - btc_log_returns.png 30 | - btc_qqplot.png 31 | - btc_qqplot_estgarch.png 32 | - saved_models 33 | -------------------------------------------------------------------------------- /MLvsGARCHecon/README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | Header Image 4 | 5 |
6 | 7 | ``` 8 | Name of Quantlet: MLvsGARCHecon 9 | 10 | Published in: Crypto volatility forecasting: ML vs GARCH 11 | 12 | Description: Do a first econometrics analysis of btc log returns following Chen et al (2017) in `MLvsGARCHecon_1.R`, then build a rolling forecast with selected model `MLvsGARCHecon_2.R` 13 | 14 | Keywords: econometrics, GARCH, EGARCH, volatility forecasting, realized volatility, cryptocurrency, btc 15 | 16 | Author: Bruno Spilak 17 | 18 | See also: 19 | - MLvsGARCHml 20 | - MLvsGARCHcomp 21 | - econ-tgarch 22 | - econ_arch 23 | - econ_arima 24 | - econ_ccgar 25 | - econ_crix 26 | - econ_garch 27 | - econ_vola 28 | 29 | Submitted: 12.07.2019 30 | 31 | Datafile: `../data/btc_1H_20160101_20190101.csv`: candle price of btc with 1 hour frequency 32 | 33 | Input: The reader can modify the parameters of the model in each code file. 34 | 35 | Output: 36 | - btc_log_returns.png 37 | - btc_qqplot.png 38 | - btc_qqplot_estgarch.png 39 | - saved_models 40 | 41 | ``` 42 |
43 | Image 44 |
45 | 46 |
47 | Image 48 |
49 | 50 |
51 | Image 52 |
53 | 54 |
55 | Image 56 |
57 | 58 | -------------------------------------------------------------------------------- /MLvsGARCHecon/READMEEVT.md: -------------------------------------------------------------------------------- 1 | 'MLvsGARCHecon' 2 | 3 | Published in: 'Crypto volatility forecasting: ML vs GARCH' 4 | 5 | Description: 'Fit EVT-Garch to one hour btc returns' 6 | 7 | Keywords: 'econometrics, extreme value theory, GARCH, EGARCH, volatility forecasting, realized volatility, cryptocurrency, btc' 8 | 9 | * How to run: first_gpd.R 10 | * config.json: config file -------------------------------------------------------------------------------- /MLvsGARCHecon/btc_log_returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/MLvsGARCHecon/btc_log_returns.png -------------------------------------------------------------------------------- /MLvsGARCHecon/btc_qqplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/MLvsGARCHecon/btc_qqplot.png -------------------------------------------------------------------------------- /MLvsGARCHecon/btc_qqplot_estgarch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/MLvsGARCHecon/btc_qqplot_estgarch.png -------------------------------------------------------------------------------- /MLvsGARCHecon/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "data_param": { 3 | "log": true, 4 | "normalization": { 5 | }, 6 | "data_path": "../data/btc_1H_20160101_20190101.csv" 7 | }, 8 | "training_param": { 9 | "window_size": 2880, 10 | "q_fit": 0.1 11 | }, 12 | "comments": "", 13 | "quantiles": [0.1], 14 | "cv_param": { 15 | "cv_test_start": 0.7, 16 | "cv_split": 10 17 | } 18 | } -------------------------------------------------------------------------------- /MLvsGARCHecon/core.R: -------------------------------------------------------------------------------- 1 | # install and load packages 2 | libraries = c( "timeSeries", "forecast", "fGarch", "bsts", "rugarch", "caret") #"tseries2 3 | lapply(libraries, function(x) if (!(x %in% installed.packages())) { 4 | install.packages(x) 5 | }) 6 | lapply(libraries, library, quietly = TRUE, character.only = TRUE) 7 | 8 | # load dataset 9 | 10 | get_returns = function(path){ 11 | data = read.csv(file=path, header=TRUE, sep=",", dec=".") 12 | close = data[1:nrow(data),"close"] 13 | dates = c(levels(data[1:nrow(data),"X"])) 14 | dates = as.POSIXct(dates, format="%Y-%m-%d %H:%M:%S", tz = "GMT") 15 | ret = data.frame("dates" = dates[2:length(dates)], "close" = diff(log(close))) 16 | ret = na.omit(ret) 17 | 18 | ret = timeSeries::as.timeSeries(data.frame("close"=ret$close, 19 | row.names=ret$dates), 20 | FinCenter = "GMT") 21 | 22 | ret = na.omit(ret) 23 | 24 | return (ret) 25 | } 26 | 27 | 28 | rolling_forecast = function(train_ret, test_ret, test_dates, armaOrder, every = 24, 29 | distribution = 'sstd', save_path=NULL){ 30 | # Initialization 31 | predsigma = c() 32 | predfitted = c() 33 | n_train = length(train_ret) 34 | close = c(train_ret, test_ret) 35 | length_test = length(test_ret) 36 | # Specify model: EGARCH(1,2) 37 | spec = ugarchspec(mean.model = list(armaOrder = armaOrder), 38 | variance.model = list(model = 'eGARCH', 39 | garchOrder = c(1,2)), 40 | distribution = distribution) 41 | print('Fit model') 42 | fit = ugarchfit(spec, close[1:n_train]) 43 | list_coefs = as.list(coef(fit)) 44 | # create a specification with fixed parameters: 45 | specf = ugarchspec(mean.model = list(armaOrder = armaOrder), 46 | variance.model = list(model = 'eGARCH', 47 | garchOrder = c(1,2)), 48 | distribution = distribution, fixed.pars = list_coefs) 49 | 50 | # we will also create the closed form forecast 51 | afor = matrix(NA, ncol = length_test, nrow = 2) 52 | rownames(afor) = c('Mu', 'Sigma') 53 | colnames(afor) = test_dates #paste('T+', 1:length_test, sep = '') 54 | # T+1 we can use ugarchsim: 55 | tmp = ugarchforecast(fit, n.ahead = 1) 56 | afor[, 1] = c(fitted(tmp), sigma(tmp)) 57 | # for T+(i>1): 58 | for (i in 2:length_test) { 59 | if (i%%100 == 0){ 60 | print(length_test - i) 61 | } 62 | tmp = ugarchforecast(specf, close[1:(n_train + i - 1)], n.ahead = 1) 63 | afor[, i] = c(fitted(tmp), sigma(tmp)) 64 | } 65 | #plot(abs(close[(n_train + 1):(n_train + length_test)]), type = 'l') 66 | #lines(afor[2,1:ncol(afor)], type='l', col = 'red') 67 | print('saving') 68 | write.csv(t(afor), paste0(save_path, '.csv'), row.names = TRUE) 69 | save(afor, file=paste0(save_path, '.RData')) 70 | write.csv(list_coefs, paste0(save_path, '_coef.csv'), row.names = FALSE) 71 | 72 | return_ = list(afor, fit, list_coefs) 73 | names(return_) <- c("afor", "fit", "coefs") 74 | return (return_) 75 | 76 | } 77 | 78 | 79 | cv_prediction = function(ret, cv, armaOrder, distribution, every, n_train = NULL, save = FALSE, comments = NULL){ 80 | dates = as.POSIXct(row.names(ret), format="%Y-%m-%d %H:%M:%S", tz = "GMT") 81 | results = c() 82 | time <- Sys.time() 83 | filepath = file.path("./saved_models", gsub(' ', '', gsub('-', '', gsub(':', '', time)))) 84 | filepath = paste0(paste0(filepath, '_'), comments) 85 | print(filepath) 86 | ifelse(!dir.exists("./saved_models/"), dir.create("./saved_models/"), FALSE) 87 | ifelse(!dir.exists(filepath), dir.create(filepath), FALSE) 88 | 89 | for (i in seq(0, (length(cv) - 1))) { 90 | print(i) 91 | start_time <- Sys.time() 92 | print( 93 | paste('FIT TO GO', as.integer(( length(cv) - (i+1) )) ) 94 | ) 95 | cv_i = paste0('cv_', i) 96 | train_dates_i = as.POSIXct(cv[[cv_i]]$train, format="%Y-%m-%d %H:%M:%S", tz = "GMT") 97 | train_ret = ret[dates %in% train_dates_i] 98 | 99 | if (!is.null(n_train)){ 100 | train_ret = train_ret[(length(train_ret) - n_train + 1): length(train_ret)] 101 | } 102 | print(length(train_ret)) 103 | test_dates_i = as.POSIXct(cv[[cv_i]]$date_test, format="%Y-%m-%d %H:%M:%S", tz = "GMT") 104 | test_ret = ret[dates %in% test_dates_i] 105 | 106 | savepath = paste0(paste0(filepath, '/cv'), i) 107 | print(savepath) 108 | returns_ = rolling_forecast(train_ret, 109 | test_ret, 110 | test_dates_i, 111 | armaOrder = armaOrder, 112 | every = every, 113 | distribution = distribution, 114 | save_path=savepath) 115 | end_time <- Sys.time() 116 | print(end_time - start_time) 117 | } 118 | 119 | if (save){ 120 | results = c(results, returns_) 121 | save(results, 122 | file = paste0(filepath, '/results.RData') 123 | ) 124 | } 125 | return (results) 126 | } -------------------------------------------------------------------------------- /MLvsGARCHecon/definition.R: -------------------------------------------------------------------------------- 1 | # VaR for a Generalized Pareto Distribution (GDP) 2 | var.normal = function(probs, mean, sd) 3 | { 4 | var = mean + sd * qnorm(p = probs) 5 | return(var) 6 | } 7 | 8 | # VaR for a Student t distribution 9 | var.student = function(probs, mean, sd, df) 10 | { 11 | scaling.factor = sqrt((df-2) / df) 12 | var = mean + sd * (scaling.factor * qt(p = probs, df = df) ) 13 | return(var) 14 | } 15 | 16 | # VaR for a Generalized Pareto Distribution (GDP) 17 | var.gpd = function(probs, threshold, scale, shape, n, Nu) 18 | { 19 | var = threshold + (scale / shape) * (((n / Nu) * (1 - probs)) ^ (-shape) - 1) 20 | return(var) 21 | } 22 | 23 | # ES for a normal distribution 24 | es.normal = function(probs, mean, sd) 25 | { 26 | es = mean + sd * (dnorm(x=qnorm(p=probs)) / (1-probs)) 27 | return(es) 28 | } 29 | 30 | # ES for a Student t distribution 31 | es.student = function(probs, mean, sd, df) 32 | { 33 | scaling.factor = sqrt((df-2)/df) 34 | factor1 = dt(x=qt(p=probs, df=df), df=df) / (1-probs) 35 | factor2 = (df + (qt(p=probs, df=df))^2 ) / (df-1) 36 | es = mean + sd * scaling.factor * factor1 * factor2 37 | return(es) 38 | } 39 | 40 | # ES for a GPD 41 | es.gpd = function(var, threshold, scale, shape) 42 | { 43 | es = var / (1-shape) + (scale - shape * threshold) / (1-shape) 44 | return(es) 45 | } 46 | 47 | 48 | # Tail proba for a GPD 49 | tail.gpd = function(x, threshold, scale, shape, n, Nu) 50 | { 51 | proba = Nu/n * ( 1 + shape * (x - threshold)/scale )^(-1/shape) 52 | return(proba) 53 | } -------------------------------------------------------------------------------- /MLvsGARCHecon/res_squared_p_acf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/MLvsGARCHecon/res_squared_p_acf.png -------------------------------------------------------------------------------- /MLvsGARCHecon/tools.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import datetime as dt 3 | 4 | 5 | def load_econ_pred(path='./saved_models/20190922155918or_prediction_10per_proba.csv', qs = [0.9], ret_std = True): 6 | df = pd.read_csv(path, index_col = 0) 7 | df.index = pd.to_datetime(df.index) 8 | 9 | # Compute var - u and var breaks 10 | q_columns = ['threshold', 'evt_var', 'evt_es', 'var', 'es', 'mean', 'zq'] 11 | df_columns = [] 12 | for q in qs: 13 | df_columns.append([c + '_' + str(q) for c in q_columns]) 14 | df_columns = [c for cl in df_columns for c in cl] 15 | 16 | if ret_std: 17 | for c in df_columns + ['std_losses']: 18 | df.loc[:, c] = - df.loc[:, c] * df['norm_sd'] 19 | else: 20 | for c in df_columns: 21 | df.loc[:, c] = - df.loc[:, c] * df['norm_sd'] 22 | df.loc[:, 'std_losses'] = - df.loc[:, 'std_losses'] 23 | 24 | for c in ['sd_' + str(q) for q in qs]: 25 | df.loc[:, c] = df.loc[:, c] * df['norm_sd'] 26 | 27 | df.columns = ['returns'] + list(df.columns)[1:] 28 | 29 | return df 30 | 31 | -------------------------------------------------------------------------------- /MLvsGARCHml/utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from constant import QS_NAME, W_NAME 4 | 5 | 6 | def get_dl_perf_df(path): 7 | dl_result = pd.read_pickle('%s/dl_result.p' % path) 8 | qs_name = ['01', '025', '05', '10'] 9 | ws_name = ['woneday', 'w4months', 'w6months'] 10 | compdf = dl_result[(0.01, 24)]['df'][['returns']].copy() 11 | 12 | for w in ws_name: 13 | wdf = pd.DataFrame() 14 | for q in qs_name: 15 | usualc = '%s_0.%s' % (w, q) 16 | 17 | qdf = dl_result[(QS_NAME[q], W_NAME[w])]['df'].drop('returns', 1) 18 | qdf.columns = ['%s_%s' % (c, usualc) for c in 19 | qdf.columns] # [w + '_' + 'q0.%s_' % q + c for c in qdf.columns] 20 | wdf = pd.concat([wdf, qdf], 1) 21 | 22 | if q == '10': 23 | c = '%s_0.1' % w 24 | else: 25 | c = '%s_0.%s' % (w, q) 26 | 27 | compdf = pd.concat([compdf, wdf], 1) 28 | compdf = compdf.loc[:, ~compdf.columns.duplicated()] 29 | 30 | if np.sum(compdf.isna().sum()) != 0: 31 | print('Warning: There are NaNs in compdf...') 32 | print('Dropping NaNs...') 33 | compdf.dropna(inplace=True) 34 | 35 | return compdf 36 | -------------------------------------------------------------------------------- /Metainfo.txt: -------------------------------------------------------------------------------- 1 | Name of Quantlet: 'MLvsGARCH' 2 | 3 | Published in: 'Book: Encyclopedia of Finance. DOI : 10.1007/978-3-030-73443-5 Chapter Title : Tail-Risk Protection: Machine Learning Meets Modern Econometrics.' 4 | 5 | Description: 'We provide results for "Tail-risk protection: Machine Learning meets modern Econometrics", Spilak, WK Härdle (2020). Please refer to README2.md for a detailed description on how to use the code.' 6 | 7 | Keywords: 'tail-risk, trading strategy, cryptocurrency, Value-At-Risk, deep learning, machine learning, econometrics, extreme value theory, exceedance probability' 8 | 9 | Author: 'Bruno Spilak' 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | Header Image 4 | 5 |
6 | 7 | ``` 8 | Name of Quantlet: MLvsGARCH 9 | 10 | Published in: Book: Encyclopedia of Finance. DOI : 10.1007/978-3-030-73443-5 Chapter Title : Tail-Risk Protection: Machine Learning Meets Modern Econometrics. 11 | 12 | Description: We provide results for "Tail-risk protection: Machine Learning meets modern Econometrics", Spilak, WK Härdle (2020). Please refer to README2.md for a detailed description on how to use the code. 13 | 14 | Keywords: tail-risk, trading strategy, cryptocurrency, Value-At-Risk, deep learning, machine learning, econometrics, extreme value theory, exceedance probability 15 | 16 | Author: Bruno Spilak 17 | 18 | ``` 19 |
20 | Image 21 |
22 | 23 |
24 | Image 25 |
26 | 27 |
28 | Image 29 |
30 | 31 |
32 | Image 33 |
34 | 35 |
36 | Image 37 |
38 | 39 |
40 | Image 41 |
42 | 43 | -------------------------------------------------------------------------------- /README2.md: -------------------------------------------------------------------------------- 1 | [Visit QuantNet](http://quantlet.de/index.php?p=info) 2 | 3 | # Abstract of the work: 4 | We provide results for "Tail-risk protection: Machine Learning meets modern Econometrics", Spilak, WK Härdle (2020). 5 | 6 | # Installation: 7 | 8 | To be able to run the python code, the user must install virutalenv (https://virtualenv.pypa.io/en/latest/installation/), create a virutal enviroment with python 3 and install the requirements: 9 | 10 | ``` 11 | pip install virtualenv 12 | virtualenv -p python3 venv 13 | source venv/bin/activate 14 | pip install -r requirements.txt 15 | ``` 16 | 17 | # [Visit QuantNet](http://quantlet.de/) **[MLvsGARCHecon](MLvsGARCHecon)** [Visit QuantNet 2.0](http://quantlet.de/) 18 | 19 | 20 | In this repo, we first do a time series analysis of btc log returns in order to find the best parameter following Box-Jengins method *(ARIMA(3,0,1)-EskewtGARCH(1,2)*). 21 | 22 | Then, we build a rolling forecast on the validation set with a refit paramter. The user can modifiy the parameters directly in the code. 23 | 24 | 25 | # Plots 26 | 27 | We present here all plots in the paper, for example: 28 | 29 | ## Historical performance 30 | 31 | ![Picture1](strat_historical_perf.png) 32 | 33 | ## Constraints 34 | 35 | ![Picture2](min_tpr_exceedance_alpha.png) 36 | -------------------------------------------------------------------------------- /Table 3-4-5-6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "%matplotlib inline\n", 13 | "from result.utils import qs_from_name, get_col_from_wq, get_boxdata\n", 14 | "from result.compare import get_table_report\n", 15 | "import pickle\n", 16 | "\n", 17 | "save = False\n", 18 | "legend = False\n", 19 | "\n", 20 | "# constants\n", 21 | "idx = pd.IndexSlice\n", 22 | "classifiers = ['mlp', 'lstm', 'garch', 'evtgarch', 'lpa', 'carl', 'ensemble']\n", 23 | "order_cols = ['btc'] + classifiers + ['var_norm', 'var_evt', 'varspread']\n", 24 | "ws_name = ['woneday', 'w4months', 'w6months']\n", 25 | "qs_name = ['01', '025', '05', '10']\n", 26 | "\n", 27 | "\n", 28 | "# Get result table\n", 29 | "compdf = pickle.load(open('hist_performance.p', 'rb'))" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "# Table report" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": { 43 | "scrolled": true 44 | }, 45 | "outputs": [ 46 | { 47 | "name": "stderr", 48 | "output_type": "stream", 49 | "text": [ 50 | "/Users/brunospilak/Documents/HU/MLvsGARCH/result/utils.py:18: FutureWarning: \n", 51 | "Passing list-likes to .loc or [] with any missing label will raise\n", 52 | "KeyError in the future, you can use .reindex() as an alternative.\n", 53 | "\n", 54 | "See the documentation here:\n", 55 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 56 | " daily_data = daily_data.loc[pd.date_range(daily_data.index[0], daily_data.index[-1], freq='D')]\n" 57 | ] 58 | }, 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "total return\n", 64 | "Avg return\n", 65 | "Avg return classes 0 and 1\n", 66 | "Volatility\n", 67 | "Excess sharpe ratio\n", 68 | "Sharpe ratio\n", 69 | "Exceedance\n", 70 | "Value-At-Risk and status\n", 71 | "Max DrawDown\n", 72 | "Sortino ratio\n", 73 | "Brier score\n", 74 | "Cross entropy\n", 75 | "TPR and TNR\n", 76 | "F score\n", 77 | "AUC score\n", 78 | "Risk-adjusted AUC\n", 79 | "Respected constraint\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "table = get_table_report(compdf, qs_name, ws_name, varspread=True, carl=True, lpa=True, \n", 85 | " ensemble=True, var_norm = True, var_evt = True, mlp=True)\n", 86 | "if save:\n", 87 | " table = pickle.dump(table, open('test_table.p', 'wb'))" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Table 3: Risk-adjusted AUC on the test set" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/html": [ 105 | "
\n", 106 | "\n", 119 | "\n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | "
mlplstmgarchevtgarchlpacarlensemble
risk_aucrisk_aucrisk_aucrisk_aucrisk_aucrisk_aucrisk_auc
0.0102469697373717173
288062676972726974
432070697072727474
0.0252466697070696971
288072717474767277
432071737474757377
0.0502465656666656667
288071707474757376
432070717473757376
0.1002463636363606264
288067667070717172
432066686970707171
\n", 271 | "
" 272 | ], 273 | "text/plain": [ 274 | " mlp lstm garch evtgarch lpa carl ensemble\n", 275 | " risk_auc risk_auc risk_auc risk_auc risk_auc risk_auc risk_auc\n", 276 | "0.010 24 69 69 73 73 71 71 73\n", 277 | " 2880 62 67 69 72 72 69 74\n", 278 | " 4320 70 69 70 72 72 74 74\n", 279 | "0.025 24 66 69 70 70 69 69 71\n", 280 | " 2880 72 71 74 74 76 72 77\n", 281 | " 4320 71 73 74 74 75 73 77\n", 282 | "0.050 24 65 65 66 66 65 66 67\n", 283 | " 2880 71 70 74 74 75 73 76\n", 284 | " 4320 70 71 74 73 75 73 76\n", 285 | "0.100 24 63 63 63 63 60 62 64\n", 286 | " 2880 67 66 70 70 71 71 72\n", 287 | " 4320 66 68 69 70 70 71 71" 288 | ] 289 | }, 290 | "execution_count": 3, 291 | "metadata": {}, 292 | "output_type": "execute_result" 293 | } 294 | ], 295 | "source": [ 296 | "(np.round(table[classifiers].loc[:, idx[:, 'risk_auc']]*100)).astype(int)#.idxmax(1)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "# cross validation result" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 4, 309 | "metadata": { 310 | "scrolled": true 311 | }, 312 | "outputs": [ 313 | { 314 | "name": "stdout", 315 | "output_type": "stream", 316 | "text": [ 317 | "cv_0\n", 318 | "total return\n" 319 | ] 320 | }, 321 | { 322 | "name": "stderr", 323 | "output_type": "stream", 324 | "text": [ 325 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 326 | "Passing list-likes to .loc or [] with any missing label will raise\n", 327 | "KeyError in the future, you can use .reindex() as an alternative.\n", 328 | "\n", 329 | "See the documentation here:\n", 330 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 331 | " \n" 332 | ] 333 | }, 334 | { 335 | "name": "stdout", 336 | "output_type": "stream", 337 | "text": [ 338 | "Avg return\n", 339 | "Avg return classes 0 and 1\n", 340 | "Volatility\n", 341 | "Excess sharpe ratio\n", 342 | "Sharpe ratio\n", 343 | "Exceedance\n", 344 | "Value-At-Risk and status\n", 345 | "Max DrawDown\n", 346 | "Sortino ratio\n", 347 | "Brier score\n", 348 | "Cross entropy\n", 349 | "TPR and TNR\n", 350 | "F score\n" 351 | ] 352 | }, 353 | { 354 | "name": "stderr", 355 | "output_type": "stream", 356 | "text": [ 357 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 358 | " 'precision', 'predicted', average, warn_for)\n" 359 | ] 360 | }, 361 | { 362 | "name": "stdout", 363 | "output_type": "stream", 364 | "text": [ 365 | "AUC score\n", 366 | "Risk-adjusted AUC\n", 367 | "Respected constraint\n", 368 | "cv_1\n", 369 | "total return\n" 370 | ] 371 | }, 372 | { 373 | "name": "stderr", 374 | "output_type": "stream", 375 | "text": [ 376 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 377 | "Passing list-likes to .loc or [] with any missing label will raise\n", 378 | "KeyError in the future, you can use .reindex() as an alternative.\n", 379 | "\n", 380 | "See the documentation here:\n", 381 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 382 | " \n" 383 | ] 384 | }, 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "Avg return\n", 390 | "Avg return classes 0 and 1\n", 391 | "Volatility\n", 392 | "Excess sharpe ratio\n", 393 | "Sharpe ratio\n" 394 | ] 395 | }, 396 | { 397 | "name": "stderr", 398 | "output_type": "stream", 399 | "text": [ 400 | "/Users/brunospilak/Documents/HU/MLvsGARCH/result/compare.py:75: RuntimeWarning: invalid value encountered in double_scalars\n", 401 | " return (returns - benchmark).mean() / (returns - benchmark).std() * np.sqrt(period)\n" 402 | ] 403 | }, 404 | { 405 | "name": "stdout", 406 | "output_type": "stream", 407 | "text": [ 408 | "Exceedance\n", 409 | "Value-At-Risk and status\n", 410 | "Max DrawDown\n", 411 | "Sortino ratio\n", 412 | "Brier score\n", 413 | "Cross entropy\n", 414 | "TPR and TNR\n", 415 | "F score\n" 416 | ] 417 | }, 418 | { 419 | "name": "stderr", 420 | "output_type": "stream", 421 | "text": [ 422 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 423 | " 'precision', 'predicted', average, warn_for)\n" 424 | ] 425 | }, 426 | { 427 | "name": "stdout", 428 | "output_type": "stream", 429 | "text": [ 430 | "AUC score\n", 431 | "Risk-adjusted AUC\n", 432 | "Respected constraint\n", 433 | "cv_2\n", 434 | "total return\n" 435 | ] 436 | }, 437 | { 438 | "name": "stderr", 439 | "output_type": "stream", 440 | "text": [ 441 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 442 | "Passing list-likes to .loc or [] with any missing label will raise\n", 443 | "KeyError in the future, you can use .reindex() as an alternative.\n", 444 | "\n", 445 | "See the documentation here:\n", 446 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 447 | " \n" 448 | ] 449 | }, 450 | { 451 | "name": "stdout", 452 | "output_type": "stream", 453 | "text": [ 454 | "Avg return\n", 455 | "Avg return classes 0 and 1\n", 456 | "Volatility\n", 457 | "Excess sharpe ratio\n", 458 | "Sharpe ratio\n", 459 | "Exceedance\n", 460 | "Value-At-Risk and status\n", 461 | "Max DrawDown\n", 462 | "Sortino ratio\n", 463 | "Brier score\n", 464 | "Cross entropy\n", 465 | "TPR and TNR\n", 466 | "F score\n" 467 | ] 468 | }, 469 | { 470 | "name": "stderr", 471 | "output_type": "stream", 472 | "text": [ 473 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 474 | " 'precision', 'predicted', average, warn_for)\n" 475 | ] 476 | }, 477 | { 478 | "name": "stdout", 479 | "output_type": "stream", 480 | "text": [ 481 | "AUC score\n", 482 | "Risk-adjusted AUC\n", 483 | "Respected constraint\n", 484 | "cv_3\n", 485 | "total return\n" 486 | ] 487 | }, 488 | { 489 | "name": "stderr", 490 | "output_type": "stream", 491 | "text": [ 492 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 493 | "Passing list-likes to .loc or [] with any missing label will raise\n", 494 | "KeyError in the future, you can use .reindex() as an alternative.\n", 495 | "\n", 496 | "See the documentation here:\n", 497 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 498 | " \n", 499 | "/Users/brunospilak/Documents/HU/MLvsGARCH/result/utils.py:18: FutureWarning: \n", 500 | "Passing list-likes to .loc or [] with any missing label will raise\n", 501 | "KeyError in the future, you can use .reindex() as an alternative.\n", 502 | "\n", 503 | "See the documentation here:\n", 504 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 505 | " daily_data = daily_data.loc[pd.date_range(daily_data.index[0], daily_data.index[-1], freq='D')]\n" 506 | ] 507 | }, 508 | { 509 | "name": "stdout", 510 | "output_type": "stream", 511 | "text": [ 512 | "Avg return\n", 513 | "Avg return classes 0 and 1\n", 514 | "Volatility\n", 515 | "Excess sharpe ratio\n", 516 | "Sharpe ratio\n", 517 | "Exceedance\n", 518 | "Value-At-Risk and status\n", 519 | "Max DrawDown\n", 520 | "Sortino ratio\n", 521 | "Brier score\n", 522 | "Cross entropy\n", 523 | "TPR and TNR\n", 524 | "F score\n" 525 | ] 526 | }, 527 | { 528 | "name": "stderr", 529 | "output_type": "stream", 530 | "text": [ 531 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 532 | " 'precision', 'predicted', average, warn_for)\n" 533 | ] 534 | }, 535 | { 536 | "name": "stdout", 537 | "output_type": "stream", 538 | "text": [ 539 | "AUC score\n", 540 | "Risk-adjusted AUC\n", 541 | "Respected constraint\n", 542 | "cv_4\n", 543 | "total return\n" 544 | ] 545 | }, 546 | { 547 | "name": "stderr", 548 | "output_type": "stream", 549 | "text": [ 550 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 551 | "Passing list-likes to .loc or [] with any missing label will raise\n", 552 | "KeyError in the future, you can use .reindex() as an alternative.\n", 553 | "\n", 554 | "See the documentation here:\n", 555 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 556 | " \n" 557 | ] 558 | }, 559 | { 560 | "name": "stdout", 561 | "output_type": "stream", 562 | "text": [ 563 | "Avg return\n", 564 | "Avg return classes 0 and 1\n", 565 | "Volatility\n", 566 | "Excess sharpe ratio\n", 567 | "Sharpe ratio\n", 568 | "Exceedance\n", 569 | "Value-At-Risk and status\n", 570 | "Max DrawDown\n", 571 | "Sortino ratio\n", 572 | "Brier score\n", 573 | "Cross entropy\n", 574 | "TPR and TNR\n", 575 | "F score\n" 576 | ] 577 | }, 578 | { 579 | "name": "stderr", 580 | "output_type": "stream", 581 | "text": [ 582 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 583 | " 'precision', 'predicted', average, warn_for)\n" 584 | ] 585 | }, 586 | { 587 | "name": "stdout", 588 | "output_type": "stream", 589 | "text": [ 590 | "AUC score\n", 591 | "Risk-adjusted AUC\n", 592 | "Respected constraint\n", 593 | "cv_5\n", 594 | "total return\n" 595 | ] 596 | }, 597 | { 598 | "name": "stderr", 599 | "output_type": "stream", 600 | "text": [ 601 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 602 | "Passing list-likes to .loc or [] with any missing label will raise\n", 603 | "KeyError in the future, you can use .reindex() as an alternative.\n", 604 | "\n", 605 | "See the documentation here:\n", 606 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 607 | " \n" 608 | ] 609 | }, 610 | { 611 | "name": "stdout", 612 | "output_type": "stream", 613 | "text": [ 614 | "Avg return\n", 615 | "Avg return classes 0 and 1\n", 616 | "Volatility\n", 617 | "Excess sharpe ratio\n", 618 | "Sharpe ratio\n", 619 | "Exceedance\n", 620 | "Value-At-Risk and status\n", 621 | "Max DrawDown\n", 622 | "Sortino ratio\n", 623 | "Brier score\n", 624 | "Cross entropy\n", 625 | "TPR and TNR\n", 626 | "F score\n" 627 | ] 628 | }, 629 | { 630 | "name": "stderr", 631 | "output_type": "stream", 632 | "text": [ 633 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 634 | " 'precision', 'predicted', average, warn_for)\n" 635 | ] 636 | }, 637 | { 638 | "name": "stdout", 639 | "output_type": "stream", 640 | "text": [ 641 | "AUC score\n", 642 | "Risk-adjusted AUC\n", 643 | "Respected constraint\n", 644 | "cv_6\n", 645 | "total return\n" 646 | ] 647 | }, 648 | { 649 | "name": "stderr", 650 | "output_type": "stream", 651 | "text": [ 652 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 653 | "Passing list-likes to .loc or [] with any missing label will raise\n", 654 | "KeyError in the future, you can use .reindex() as an alternative.\n", 655 | "\n", 656 | "See the documentation here:\n", 657 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 658 | " \n" 659 | ] 660 | }, 661 | { 662 | "name": "stdout", 663 | "output_type": "stream", 664 | "text": [ 665 | "Avg return\n", 666 | "Avg return classes 0 and 1\n", 667 | "Volatility\n", 668 | "Excess sharpe ratio\n", 669 | "Sharpe ratio\n", 670 | "Exceedance\n", 671 | "Value-At-Risk and status\n", 672 | "Max DrawDown\n", 673 | "Sortino ratio\n", 674 | "Brier score\n", 675 | "Cross entropy\n", 676 | "TPR and TNR\n", 677 | "F score\n" 678 | ] 679 | }, 680 | { 681 | "name": "stderr", 682 | "output_type": "stream", 683 | "text": [ 684 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 685 | " 'precision', 'predicted', average, warn_for)\n" 686 | ] 687 | }, 688 | { 689 | "name": "stdout", 690 | "output_type": "stream", 691 | "text": [ 692 | "AUC score\n", 693 | "Risk-adjusted AUC\n", 694 | "Respected constraint\n", 695 | "cv_7\n", 696 | "total return\n" 697 | ] 698 | }, 699 | { 700 | "name": "stderr", 701 | "output_type": "stream", 702 | "text": [ 703 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 704 | "Passing list-likes to .loc or [] with any missing label will raise\n", 705 | "KeyError in the future, you can use .reindex() as an alternative.\n", 706 | "\n", 707 | "See the documentation here:\n", 708 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 709 | " \n" 710 | ] 711 | }, 712 | { 713 | "name": "stdout", 714 | "output_type": "stream", 715 | "text": [ 716 | "Avg return\n", 717 | "Avg return classes 0 and 1\n", 718 | "Volatility\n", 719 | "Excess sharpe ratio\n", 720 | "Sharpe ratio\n", 721 | "Exceedance\n", 722 | "Value-At-Risk and status\n", 723 | "Max DrawDown\n", 724 | "Sortino ratio\n", 725 | "Brier score\n", 726 | "Cross entropy\n", 727 | "TPR and TNR\n", 728 | "F score\n" 729 | ] 730 | }, 731 | { 732 | "name": "stderr", 733 | "output_type": "stream", 734 | "text": [ 735 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 736 | " 'precision', 'predicted', average, warn_for)\n" 737 | ] 738 | }, 739 | { 740 | "name": "stdout", 741 | "output_type": "stream", 742 | "text": [ 743 | "AUC score\n", 744 | "Risk-adjusted AUC\n", 745 | "Respected constraint\n", 746 | "cv_8\n", 747 | "total return\n" 748 | ] 749 | }, 750 | { 751 | "name": "stderr", 752 | "output_type": "stream", 753 | "text": [ 754 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 755 | "Passing list-likes to .loc or [] with any missing label will raise\n", 756 | "KeyError in the future, you can use .reindex() as an alternative.\n", 757 | "\n", 758 | "See the documentation here:\n", 759 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 760 | " \n" 761 | ] 762 | }, 763 | { 764 | "name": "stdout", 765 | "output_type": "stream", 766 | "text": [ 767 | "Avg return\n", 768 | "Avg return classes 0 and 1\n", 769 | "Volatility\n", 770 | "Excess sharpe ratio\n", 771 | "Sharpe ratio\n", 772 | "Exceedance\n", 773 | "Value-At-Risk and status\n", 774 | "Max DrawDown\n", 775 | "Sortino ratio\n", 776 | "Brier score\n", 777 | "Cross entropy\n", 778 | "TPR and TNR\n", 779 | "F score\n", 780 | "AUC score\n", 781 | "Risk-adjusted AUC\n", 782 | "Respected constraint\n", 783 | "cv_9\n", 784 | "total return\n" 785 | ] 786 | }, 787 | { 788 | "name": "stderr", 789 | "output_type": "stream", 790 | "text": [ 791 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 792 | "Passing list-likes to .loc or [] with any missing label will raise\n", 793 | "KeyError in the future, you can use .reindex() as an alternative.\n", 794 | "\n", 795 | "See the documentation here:\n", 796 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 797 | " \n" 798 | ] 799 | }, 800 | { 801 | "name": "stdout", 802 | "output_type": "stream", 803 | "text": [ 804 | "Avg return\n", 805 | "Avg return classes 0 and 1\n", 806 | "Volatility\n", 807 | "Excess sharpe ratio\n", 808 | "Sharpe ratio\n", 809 | "Exceedance\n", 810 | "Value-At-Risk and status\n", 811 | "Max DrawDown\n", 812 | "Sortino ratio\n", 813 | "Brier score\n", 814 | "Cross entropy\n", 815 | "TPR and TNR\n", 816 | "F score\n" 817 | ] 818 | }, 819 | { 820 | "name": "stderr", 821 | "output_type": "stream", 822 | "text": [ 823 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 824 | " 'precision', 'predicted', average, warn_for)\n" 825 | ] 826 | }, 827 | { 828 | "name": "stdout", 829 | "output_type": "stream", 830 | "text": [ 831 | "AUC score\n", 832 | "Risk-adjusted AUC\n", 833 | "Respected constraint\n", 834 | "cv_10\n", 835 | "total return\n" 836 | ] 837 | }, 838 | { 839 | "name": "stderr", 840 | "output_type": "stream", 841 | "text": [ 842 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 843 | "Passing list-likes to .loc or [] with any missing label will raise\n", 844 | "KeyError in the future, you can use .reindex() as an alternative.\n", 845 | "\n", 846 | "See the documentation here:\n", 847 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 848 | " \n", 849 | "/Users/brunospilak/Documents/HU/MLvsGARCH/result/utils.py:18: FutureWarning: \n", 850 | "Passing list-likes to .loc or [] with any missing label will raise\n", 851 | "KeyError in the future, you can use .reindex() as an alternative.\n", 852 | "\n", 853 | "See the documentation here:\n", 854 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 855 | " daily_data = daily_data.loc[pd.date_range(daily_data.index[0], daily_data.index[-1], freq='D')]\n" 856 | ] 857 | }, 858 | { 859 | "name": "stdout", 860 | "output_type": "stream", 861 | "text": [ 862 | "Avg return\n", 863 | "Avg return classes 0 and 1\n", 864 | "Volatility\n", 865 | "Excess sharpe ratio\n", 866 | "Sharpe ratio\n", 867 | "Exceedance\n", 868 | "Value-At-Risk and status\n", 869 | "Max DrawDown\n", 870 | "Sortino ratio\n", 871 | "Brier score\n", 872 | "Cross entropy\n", 873 | "TPR and TNR\n", 874 | "F score\n", 875 | "AUC score\n", 876 | "Risk-adjusted AUC\n", 877 | "Respected constraint\n", 878 | "cv_11\n", 879 | "total return\n" 880 | ] 881 | }, 882 | { 883 | "name": "stderr", 884 | "output_type": "stream", 885 | "text": [ 886 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/ipykernel_launcher.py:6: FutureWarning: \n", 887 | "Passing list-likes to .loc or [] with any missing label will raise\n", 888 | "KeyError in the future, you can use .reindex() as an alternative.\n", 889 | "\n", 890 | "See the documentation here:\n", 891 | "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n", 892 | " \n" 893 | ] 894 | }, 895 | { 896 | "name": "stdout", 897 | "output_type": "stream", 898 | "text": [ 899 | "Avg return\n", 900 | "Avg return classes 0 and 1\n", 901 | "Volatility\n", 902 | "Excess sharpe ratio\n", 903 | "Sharpe ratio\n", 904 | "Exceedance\n", 905 | "Value-At-Risk and status\n", 906 | "Max DrawDown\n", 907 | "Sortino ratio\n", 908 | "Brier score\n", 909 | "Cross entropy\n", 910 | "TPR and TNR\n", 911 | "F score\n" 912 | ] 913 | }, 914 | { 915 | "name": "stderr", 916 | "output_type": "stream", 917 | "text": [ 918 | "/Users/brunospilak/Documents/HU/bruno_phd/MLvsGARCH/mlvsgarch_env/lib/python3.6/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", 919 | " 'precision', 'predicted', average, warn_for)\n" 920 | ] 921 | }, 922 | { 923 | "name": "stdout", 924 | "output_type": "stream", 925 | "text": [ 926 | "AUC score\n", 927 | "Risk-adjusted AUC\n", 928 | "Respected constraint\n" 929 | ] 930 | } 931 | ], 932 | "source": [ 933 | "# load cv dates\n", 934 | "cvdates = pickle.load(open('./result/cvdates.p', 'rb'))\n", 935 | "cvtable = {}\n", 936 | "for k in cvdates.keys():\n", 937 | " print(k)\n", 938 | " cvtable[k] = get_table_report(compdf.loc[cvdates[k]].dropna(), qs_name, ws_name, varspread=True, carl=True, lpa=True, \n", 939 | " ensemble=True, var_norm = True, var_evt = True, mlp=True)\n", 940 | " new_ind = pd.MultiIndex.from_product([[0.01, 0.025, 0.05, 0.1], [24, 2880, 4320]])\n", 941 | " cvtable[k].index = new_ind\n", 942 | "if save:\n", 943 | " pickle.dump(cvtable, open('cv_table.p', 'wb'))" 944 | ] 945 | }, 946 | { 947 | "cell_type": "markdown", 948 | "metadata": {}, 949 | "source": [ 950 | "## Table 4: Average risk-adjusted AUC and variance risk-adjusted AUC over the testing periods" 951 | ] 952 | }, 953 | { 954 | "cell_type": "code", 955 | "execution_count": 5, 956 | "metadata": {}, 957 | "outputs": [ 958 | { 959 | "data": { 960 | "text/html": [ 961 | "
\n", 962 | "\n", 975 | "\n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | "
mlplstmgarchevtgarchlpacarlensemble
meanvarmeanvarmeanvarmeanvarmeanvarmeanvarmeanvar
0.0102470.10.369.70.473.70.173.50.171.50.272.20.273.40.2
288058.53.363.83.960.00.960.71.361.21.455.31.061.61.8
432062.83.763.43.157.51.261.61.059.21.357.40.859.91.6
0.0252467.90.470.40.170.80.170.90.168.80.169.60.271.30.1
288065.80.965.11.361.61.361.81.263.61.257.11.366.01.1
432063.40.665.42.061.61.462.21.661.11.256.61.365.01.6
0.0502466.60.466.80.267.50.167.40.164.90.266.20.267.80.2
288064.70.764.20.662.30.861.71.063.10.860.71.163.61.0
432065.80.564.50.762.40.961.21.063.00.960.10.863.71.0
0.1002464.00.264.40.264.40.164.40.160.60.162.60.265.20.2
288061.50.763.10.861.10.861.30.860.50.760.50.762.10.8
432062.10.764.20.761.20.761.30.760.50.860.40.661.90.9
\n", 1218 | "
" 1219 | ], 1220 | "text/plain": [ 1221 | " mlp lstm garch evtgarch lpa carl \\\n", 1222 | " mean var mean var mean var mean var mean var mean \n", 1223 | "0.010 24 70.1 0.3 69.7 0.4 73.7 0.1 73.5 0.1 71.5 0.2 72.2 \n", 1224 | " 2880 58.5 3.3 63.8 3.9 60.0 0.9 60.7 1.3 61.2 1.4 55.3 \n", 1225 | " 4320 62.8 3.7 63.4 3.1 57.5 1.2 61.6 1.0 59.2 1.3 57.4 \n", 1226 | "0.025 24 67.9 0.4 70.4 0.1 70.8 0.1 70.9 0.1 68.8 0.1 69.6 \n", 1227 | " 2880 65.8 0.9 65.1 1.3 61.6 1.3 61.8 1.2 63.6 1.2 57.1 \n", 1228 | " 4320 63.4 0.6 65.4 2.0 61.6 1.4 62.2 1.6 61.1 1.2 56.6 \n", 1229 | "0.050 24 66.6 0.4 66.8 0.2 67.5 0.1 67.4 0.1 64.9 0.2 66.2 \n", 1230 | " 2880 64.7 0.7 64.2 0.6 62.3 0.8 61.7 1.0 63.1 0.8 60.7 \n", 1231 | " 4320 65.8 0.5 64.5 0.7 62.4 0.9 61.2 1.0 63.0 0.9 60.1 \n", 1232 | "0.100 24 64.0 0.2 64.4 0.2 64.4 0.1 64.4 0.1 60.6 0.1 62.6 \n", 1233 | " 2880 61.5 0.7 63.1 0.8 61.1 0.8 61.3 0.8 60.5 0.7 60.5 \n", 1234 | " 4320 62.1 0.7 64.2 0.7 61.2 0.7 61.3 0.7 60.5 0.8 60.4 \n", 1235 | "\n", 1236 | " ensemble \n", 1237 | " var mean var \n", 1238 | "0.010 24 0.2 73.4 0.2 \n", 1239 | " 2880 1.0 61.6 1.8 \n", 1240 | " 4320 0.8 59.9 1.6 \n", 1241 | "0.025 24 0.2 71.3 0.1 \n", 1242 | " 2880 1.3 66.0 1.1 \n", 1243 | " 4320 1.3 65.0 1.6 \n", 1244 | "0.050 24 0.2 67.8 0.2 \n", 1245 | " 2880 1.1 63.6 1.0 \n", 1246 | " 4320 0.8 63.7 1.0 \n", 1247 | "0.100 24 0.2 65.2 0.2 \n", 1248 | " 2880 0.7 62.1 0.8 \n", 1249 | " 4320 0.6 61.9 0.9 " 1250 | ] 1251 | }, 1252 | "execution_count": 5, 1253 | "metadata": {}, 1254 | "output_type": "execute_result" 1255 | } 1256 | ], 1257 | "source": [ 1258 | "metric = 'risk_auc'\n", 1259 | "cvperf = pd.DataFrame(columns = pd.MultiIndex.from_product([classifiers, ['mean', 'var']]), \n", 1260 | " index = table.index)\n", 1261 | "for qw in table.index:\n", 1262 | " boxdata = get_boxdata(cvtable, metric, classifiers, qw)\n", 1263 | " arcvperf = [[np.mean(b), np.var(b)] for b in boxdata]\n", 1264 | " for i,m in enumerate(classifiers):\n", 1265 | " cvperf.loc[qw][m] = arcvperf[i]\n", 1266 | "(cvperf.astype(float) * 100).round(1)" 1267 | ] 1268 | }, 1269 | { 1270 | "cell_type": "markdown", 1271 | "metadata": {}, 1272 | "source": [ 1273 | "# Backtest metrics" 1274 | ] 1275 | }, 1276 | { 1277 | "cell_type": "markdown", 1278 | "metadata": {}, 1279 | "source": [ 1280 | "## Table 5: Exceedances of tail risk protection strategies in %" 1281 | ] 1282 | }, 1283 | { 1284 | "cell_type": "code", 1285 | "execution_count": 6, 1286 | "metadata": {}, 1287 | "outputs": [ 1288 | { 1289 | "data": { 1290 | "text/html": [ 1291 | "
\n", 1292 | "\n", 1305 | "\n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | "
btcmlplstmgarchevtgarchlpacarlensemblevar_normvar_evtvarspread
0.010244.30.30.40.50.40.91.21.01.41.13.6
28801.00.60.70.80.60.90.40.70.50.50.7
43200.90.70.50.70.50.80.40.70.40.40.7
0.025245.21.51.81.21.02.12.01.81.81.94.3
28802.52.11.61.61.31.21.21.71.21.32.1
43202.22.01.30.90.80.81.11.71.01.02.0
0.050247.93.63.62.83.14.53.42.52.63.65.9
28804.94.03.22.32.42.32.53.52.32.83.4
43204.53.83.11.41.53.62.33.32.22.53.3
0.1002412.48.48.14.04.08.36.68.84.47.29.7
28809.67.97.25.65.65.15.77.54.46.17.2
43209.47.47.03.33.24.46.07.14.55.77.2
\n", 1498 | "
" 1499 | ], 1500 | "text/plain": [ 1501 | " btc mlp lstm garch evtgarch lpa carl ensemble var_norm \\\n", 1502 | "0.010 24 4.3 0.3 0.4 0.5 0.4 0.9 1.2 1.0 1.4 \n", 1503 | " 2880 1.0 0.6 0.7 0.8 0.6 0.9 0.4 0.7 0.5 \n", 1504 | " 4320 0.9 0.7 0.5 0.7 0.5 0.8 0.4 0.7 0.4 \n", 1505 | "0.025 24 5.2 1.5 1.8 1.2 1.0 2.1 2.0 1.8 1.8 \n", 1506 | " 2880 2.5 2.1 1.6 1.6 1.3 1.2 1.2 1.7 1.2 \n", 1507 | " 4320 2.2 2.0 1.3 0.9 0.8 0.8 1.1 1.7 1.0 \n", 1508 | "0.050 24 7.9 3.6 3.6 2.8 3.1 4.5 3.4 2.5 2.6 \n", 1509 | " 2880 4.9 4.0 3.2 2.3 2.4 2.3 2.5 3.5 2.3 \n", 1510 | " 4320 4.5 3.8 3.1 1.4 1.5 3.6 2.3 3.3 2.2 \n", 1511 | "0.100 24 12.4 8.4 8.1 4.0 4.0 8.3 6.6 8.8 4.4 \n", 1512 | " 2880 9.6 7.9 7.2 5.6 5.6 5.1 5.7 7.5 4.4 \n", 1513 | " 4320 9.4 7.4 7.0 3.3 3.2 4.4 6.0 7.1 4.5 \n", 1514 | "\n", 1515 | " var_evt varspread \n", 1516 | "0.010 24 1.1 3.6 \n", 1517 | " 2880 0.5 0.7 \n", 1518 | " 4320 0.4 0.7 \n", 1519 | "0.025 24 1.9 4.3 \n", 1520 | " 2880 1.3 2.1 \n", 1521 | " 4320 1.0 2.0 \n", 1522 | "0.050 24 3.6 5.9 \n", 1523 | " 2880 2.8 3.4 \n", 1524 | " 4320 2.5 3.3 \n", 1525 | "0.100 24 7.2 9.7 \n", 1526 | " 2880 6.1 7.2 \n", 1527 | " 4320 5.7 7.2 " 1528 | ] 1529 | }, 1530 | "execution_count": 6, 1531 | "metadata": {}, 1532 | "output_type": "execute_result" 1533 | } 1534 | ], 1535 | "source": [ 1536 | "stat = (table[order_cols].loc[:, idx[:, 'exceedance']] * 100).astype(float).round(1).droplevel(1,1)\n", 1537 | "if save:\n", 1538 | " stat.to_csv('exceedance.csv')\n", 1539 | "stat" 1540 | ] 1541 | }, 1542 | { 1543 | "cell_type": "markdown", 1544 | "metadata": {}, 1545 | "source": [ 1546 | "## Table 6: Average return in %" 1547 | ] 1548 | }, 1549 | { 1550 | "cell_type": "code", 1551 | "execution_count": 8, 1552 | "metadata": {}, 1553 | "outputs": [ 1554 | { 1555 | "data": { 1556 | "text/html": [ 1557 | "
\n", 1558 | "\n", 1571 | "\n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | "
btcmlplstmgarchevtgarchlpacarlensemblevar_normvar_evtvarspread
0.010241.0e-024.9e-035.1e-032.3e-031.6e-036.5e-03-1.5e-038.9e-036.5e-036.3e-034.0e-03
28801.0e-028.0e-035.5e-031.0e-024.8e-038.6e-031.7e-024.4e-037.1e-036.3e-034.0e-03
43201.0e-028.1e-031.1e-028.8e-037.0e-039.5e-031.2e-021.4e-028.2e-037.1e-034.0e-03
0.025241.0e-027.3e-037.1e-033.4e-036.3e-038.9e-03-6.7e-046.8e-037.0e-037.5e-033.7e-03
28801.0e-026.9e-039.9e-033.6e-031.4e-032.2e-031.2e-028.5e-036.4e-036.3e-033.7e-03
43201.0e-025.8e-031.0e-025.3e-034.5e-036.3e-031.2e-028.3e-037.4e-037.5e-033.7e-03
0.050241.0e-027.2e-039.2e-035.8e-036.7e-039.3e-032.9e-037.1e-037.8e-038.3e-032.7e-03
28801.0e-026.8e-031.3e-027.9e-041.4e-032.7e-031.3e-028.7e-035.9e-036.2e-032.7e-03
43201.0e-028.5e-031.2e-021.5e-032.1e-035.1e-031.6e-021.1e-026.5e-037.2e-032.7e-03
0.100241.0e-025.7e-039.6e-035.5e-036.1e-032.8e-036.9e-036.9e-035.9e-037.8e-032.4e-03
28801.0e-021.0e-027.5e-031.6e-031.8e-035.5e-031.4e-026.8e-035.5e-036.3e-032.4e-03
43201.0e-021.4e-029.8e-033.8e-034.1e-035.5e-031.2e-028.1e-035.8e-037.3e-032.4e-03
\n", 1764 | "
" 1765 | ], 1766 | "text/plain": [ 1767 | " btc mlp lstm garch evtgarch lpa carl \\\n", 1768 | "0.010 24 1.0e-02 4.9e-03 5.1e-03 2.3e-03 1.6e-03 6.5e-03 -1.5e-03 \n", 1769 | " 2880 1.0e-02 8.0e-03 5.5e-03 1.0e-02 4.8e-03 8.6e-03 1.7e-02 \n", 1770 | " 4320 1.0e-02 8.1e-03 1.1e-02 8.8e-03 7.0e-03 9.5e-03 1.2e-02 \n", 1771 | "0.025 24 1.0e-02 7.3e-03 7.1e-03 3.4e-03 6.3e-03 8.9e-03 -6.7e-04 \n", 1772 | " 2880 1.0e-02 6.9e-03 9.9e-03 3.6e-03 1.4e-03 2.2e-03 1.2e-02 \n", 1773 | " 4320 1.0e-02 5.8e-03 1.0e-02 5.3e-03 4.5e-03 6.3e-03 1.2e-02 \n", 1774 | "0.050 24 1.0e-02 7.2e-03 9.2e-03 5.8e-03 6.7e-03 9.3e-03 2.9e-03 \n", 1775 | " 2880 1.0e-02 6.8e-03 1.3e-02 7.9e-04 1.4e-03 2.7e-03 1.3e-02 \n", 1776 | " 4320 1.0e-02 8.5e-03 1.2e-02 1.5e-03 2.1e-03 5.1e-03 1.6e-02 \n", 1777 | "0.100 24 1.0e-02 5.7e-03 9.6e-03 5.5e-03 6.1e-03 2.8e-03 6.9e-03 \n", 1778 | " 2880 1.0e-02 1.0e-02 7.5e-03 1.6e-03 1.8e-03 5.5e-03 1.4e-02 \n", 1779 | " 4320 1.0e-02 1.4e-02 9.8e-03 3.8e-03 4.1e-03 5.5e-03 1.2e-02 \n", 1780 | "\n", 1781 | " ensemble var_norm var_evt varspread \n", 1782 | "0.010 24 8.9e-03 6.5e-03 6.3e-03 4.0e-03 \n", 1783 | " 2880 4.4e-03 7.1e-03 6.3e-03 4.0e-03 \n", 1784 | " 4320 1.4e-02 8.2e-03 7.1e-03 4.0e-03 \n", 1785 | "0.025 24 6.8e-03 7.0e-03 7.5e-03 3.7e-03 \n", 1786 | " 2880 8.5e-03 6.4e-03 6.3e-03 3.7e-03 \n", 1787 | " 4320 8.3e-03 7.4e-03 7.5e-03 3.7e-03 \n", 1788 | "0.050 24 7.1e-03 7.8e-03 8.3e-03 2.7e-03 \n", 1789 | " 2880 8.7e-03 5.9e-03 6.2e-03 2.7e-03 \n", 1790 | " 4320 1.1e-02 6.5e-03 7.2e-03 2.7e-03 \n", 1791 | "0.100 24 6.9e-03 5.9e-03 7.8e-03 2.4e-03 \n", 1792 | " 2880 6.8e-03 5.5e-03 6.3e-03 2.4e-03 \n", 1793 | " 4320 8.1e-03 5.8e-03 7.3e-03 2.4e-03 " 1794 | ] 1795 | }, 1796 | "execution_count": 8, 1797 | "metadata": {}, 1798 | "output_type": "execute_result" 1799 | } 1800 | ], 1801 | "source": [ 1802 | "stat = table[order_cols].loc[:, idx[:,'ret']].droplevel(1,1) * 100\n", 1803 | "for c in stat.columns:\n", 1804 | " stat[c] = stat[c].map('{:.1e}'.format)\n", 1805 | "if save:\n", 1806 | " stat.to_csv('avg_return.csv')\n", 1807 | "stat" 1808 | ] 1809 | }, 1810 | { 1811 | "cell_type": "code", 1812 | "execution_count": null, 1813 | "metadata": {}, 1814 | "outputs": [], 1815 | "source": [] 1816 | } 1817 | ], 1818 | "metadata": { 1819 | "kernelspec": { 1820 | "display_name": "mlvsgarch", 1821 | "language": "python", 1822 | "name": "mlvsgarch" 1823 | }, 1824 | "language_info": { 1825 | "codemirror_mode": { 1826 | "name": "ipython", 1827 | "version": 3 1828 | }, 1829 | "file_extension": ".py", 1830 | "mimetype": "text/x-python", 1831 | "name": "python", 1832 | "nbconvert_exporter": "python", 1833 | "pygments_lexer": "ipython3", 1834 | "version": "3.6.7" 1835 | } 1836 | }, 1837 | "nbformat": 4, 1838 | "nbformat_minor": 4 1839 | } 1840 | -------------------------------------------------------------------------------- /Table 7 Backtest performance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "%matplotlib inline\n", 13 | "import pickle\n", 14 | "\n", 15 | "save = False\n", 16 | "idx = pd.IndexSlice\n", 17 | "\n", 18 | "# Get result table\n", 19 | "compdf = pickle.load(open('hist_performance.p', 'rb'))\n", 20 | "\n", 21 | "# table report\n", 22 | "table = pd.read_pickle('./result/final_table.p')\n", 23 | "table.loc[:,idx[['btc', 'ensemble', 'switch', 'var_evt', 'var_norm', 'varspread'], 'total_ret']]\n", 24 | "index_model = list(table.columns.levels)[0]\n", 25 | "quantiles = list(table.index.levels)[0]\n", 26 | "window = list(table.index.levels)[1]\n", 27 | "\n", 28 | "# change index\n", 29 | "newtable = pd.DataFrame(index =pd.MultiIndex.from_product([quantiles, window, index_model]), columns = list(table.columns.levels)[1])\n", 30 | "qws = table.index\n", 31 | "for qw in qws:\n", 32 | " for m in index_model:\n", 33 | " newtable.loc[(qw[0], qw[1], m)] = table.loc[qw, idx[m,:]][m]\n", 34 | "\n", 35 | "to_save = newtable.loc[idx[:,:,['btc', 'ensemble', 'var_norm', 'var_evt', 'varspread', 'switch']], ['total_ret', 'sr', 'sortino', 'mdd', 'VaR', 'vol']]\n", 36 | "to_save['total_ret'] = (to_save['total_ret'] + 1) ** (24*365/len(compdf)) - 1\n", 37 | "to_save.loc[:, ['total_ret', 'mdd', 'VaR', 'vol']] = to_save[['total_ret', 'mdd', 'VaR', 'vol']] * 100\n", 38 | "to_save = to_save.astype('float')\n", 39 | "to_save.loc[:, ['VaR', 'vol']] = to_save.loc[:, ['VaR', 'vol']].round(2)\n", 40 | "to_save.loc[:, ['sr', 'sortino']] = to_save.loc[:, ['sr', 'sortino']].round(2)\n", 41 | "to_save.loc[:, ['total_ret', 'mdd']] = to_save.loc[:, ['total_ret', 'mdd']].round(0).astype(int)\n", 42 | "to_save = to_save.loc[idx[:,:,['ensemble', 'var_norm', 'var_evt', 'varspread', 'switch']],:]\n", 43 | "if save:\n", 44 | " to_save.to_csv('final_back_strat_perf.csv')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/html": [ 55 | "
\n", 56 | "\n", 69 | "\n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | "
total_retsrsortinomddVaRvol
0.01024ensemble76.01.522.5231.01.690.54
switch59.01.572.2930.01.210.41
var_evt54.01.282.1333.01.390.46
var_norm55.01.242.1040.01.460.49
varspread34.01.141.5317.00.950.33
2880ensemble38.00.701.4453.01.850.60
switch63.01.502.3627.01.400.46
var_evt54.01.162.1545.01.550.50
var_norm61.01.322.3740.01.570.50
varspread34.01.141.5317.00.950.33
4320ensemble115.01.973.9143.01.920.64
switch111.02.393.9523.01.520.51
var_evt60.01.312.3743.01.520.50
var_norm71.01.542.7539.01.520.50
varspread34.01.141.5317.00.950.33
0.02524ensemble58.01.141.7136.01.030.55
switch47.01.271.9027.00.740.41
var_evt64.01.402.3742.00.950.50
var_norm60.01.322.2142.00.950.50
varspread32.01.352.2318.00.420.26
2880ensemble73.01.342.7155.01.050.59
switch85.02.054.1532.00.800.46
var_evt54.01.192.2145.00.930.50
var_norm55.01.232.3045.00.890.49
varspread32.01.352.2318.00.420.26
4320ensemble71.01.262.5445.01.090.62
switch65.01.502.9130.00.830.47
var_evt64.01.432.5541.00.910.49
var_norm63.01.422.5541.00.900.49
varspread32.01.352.2318.00.420.26
0.05024ensemble61.01.251.8639.00.610.53
switch64.01.642.6929.00.460.43
var_evt71.01.582.8239.00.610.49
var_norm66.01.532.6734.00.580.47
varspread23.00.701.3327.00.370.36
2880ensemble75.01.382.6054.00.710.59
switch101.02.235.0827.00.580.50
var_evt54.01.182.1945.00.620.49
var_norm50.01.172.2044.00.580.47
varspread23.00.701.3327.00.370.36
4320ensemble92.01.682.9249.00.730.61
switch108.02.385.0923.00.580.50
var_evt61.01.382.4842.00.610.49
var_norm56.01.292.3942.00.580.47
varspread23.00.701.3327.00.370.36
0.10024ensemble59.01.071.9455.00.470.61
switch73.01.703.0326.00.310.47
var_evt66.01.432.6542.00.390.51
var_norm51.01.222.0139.00.320.45
varspread21.00.741.1817.00.070.30
2880ensemble58.01.071.9552.00.460.60
switch71.01.693.3327.00.330.46
var_evt54.01.212.2944.00.400.49
var_norm47.01.182.1640.00.350.44
varspread21.00.741.1817.00.070.30
4320ensemble70.01.322.3353.00.450.58
switch97.02.274.5023.00.330.47
var_evt62.01.412.5740.00.400.48
var_norm50.01.252.2844.00.360.43
varspread21.00.741.1817.00.070.30
\n", 642 | "
" 643 | ], 644 | "text/plain": [ 645 | " total_ret sr sortino mdd VaR vol\n", 646 | "0.010 24 ensemble 76.0 1.52 2.52 31.0 1.69 0.54\n", 647 | " switch 59.0 1.57 2.29 30.0 1.21 0.41\n", 648 | " var_evt 54.0 1.28 2.13 33.0 1.39 0.46\n", 649 | " var_norm 55.0 1.24 2.10 40.0 1.46 0.49\n", 650 | " varspread 34.0 1.14 1.53 17.0 0.95 0.33\n", 651 | " 2880 ensemble 38.0 0.70 1.44 53.0 1.85 0.60\n", 652 | " switch 63.0 1.50 2.36 27.0 1.40 0.46\n", 653 | " var_evt 54.0 1.16 2.15 45.0 1.55 0.50\n", 654 | " var_norm 61.0 1.32 2.37 40.0 1.57 0.50\n", 655 | " varspread 34.0 1.14 1.53 17.0 0.95 0.33\n", 656 | " 4320 ensemble 115.0 1.97 3.91 43.0 1.92 0.64\n", 657 | " switch 111.0 2.39 3.95 23.0 1.52 0.51\n", 658 | " var_evt 60.0 1.31 2.37 43.0 1.52 0.50\n", 659 | " var_norm 71.0 1.54 2.75 39.0 1.52 0.50\n", 660 | " varspread 34.0 1.14 1.53 17.0 0.95 0.33\n", 661 | "0.025 24 ensemble 58.0 1.14 1.71 36.0 1.03 0.55\n", 662 | " switch 47.0 1.27 1.90 27.0 0.74 0.41\n", 663 | " var_evt 64.0 1.40 2.37 42.0 0.95 0.50\n", 664 | " var_norm 60.0 1.32 2.21 42.0 0.95 0.50\n", 665 | " varspread 32.0 1.35 2.23 18.0 0.42 0.26\n", 666 | " 2880 ensemble 73.0 1.34 2.71 55.0 1.05 0.59\n", 667 | " switch 85.0 2.05 4.15 32.0 0.80 0.46\n", 668 | " var_evt 54.0 1.19 2.21 45.0 0.93 0.50\n", 669 | " var_norm 55.0 1.23 2.30 45.0 0.89 0.49\n", 670 | " varspread 32.0 1.35 2.23 18.0 0.42 0.26\n", 671 | " 4320 ensemble 71.0 1.26 2.54 45.0 1.09 0.62\n", 672 | " switch 65.0 1.50 2.91 30.0 0.83 0.47\n", 673 | " var_evt 64.0 1.43 2.55 41.0 0.91 0.49\n", 674 | " var_norm 63.0 1.42 2.55 41.0 0.90 0.49\n", 675 | " varspread 32.0 1.35 2.23 18.0 0.42 0.26\n", 676 | "0.050 24 ensemble 61.0 1.25 1.86 39.0 0.61 0.53\n", 677 | " switch 64.0 1.64 2.69 29.0 0.46 0.43\n", 678 | " var_evt 71.0 1.58 2.82 39.0 0.61 0.49\n", 679 | " var_norm 66.0 1.53 2.67 34.0 0.58 0.47\n", 680 | " varspread 23.0 0.70 1.33 27.0 0.37 0.36\n", 681 | " 2880 ensemble 75.0 1.38 2.60 54.0 0.71 0.59\n", 682 | " switch 101.0 2.23 5.08 27.0 0.58 0.50\n", 683 | " var_evt 54.0 1.18 2.19 45.0 0.62 0.49\n", 684 | " var_norm 50.0 1.17 2.20 44.0 0.58 0.47\n", 685 | " varspread 23.0 0.70 1.33 27.0 0.37 0.36\n", 686 | " 4320 ensemble 92.0 1.68 2.92 49.0 0.73 0.61\n", 687 | " switch 108.0 2.38 5.09 23.0 0.58 0.50\n", 688 | " var_evt 61.0 1.38 2.48 42.0 0.61 0.49\n", 689 | " var_norm 56.0 1.29 2.39 42.0 0.58 0.47\n", 690 | " varspread 23.0 0.70 1.33 27.0 0.37 0.36\n", 691 | "0.100 24 ensemble 59.0 1.07 1.94 55.0 0.47 0.61\n", 692 | " switch 73.0 1.70 3.03 26.0 0.31 0.47\n", 693 | " var_evt 66.0 1.43 2.65 42.0 0.39 0.51\n", 694 | " var_norm 51.0 1.22 2.01 39.0 0.32 0.45\n", 695 | " varspread 21.0 0.74 1.18 17.0 0.07 0.30\n", 696 | " 2880 ensemble 58.0 1.07 1.95 52.0 0.46 0.60\n", 697 | " switch 71.0 1.69 3.33 27.0 0.33 0.46\n", 698 | " var_evt 54.0 1.21 2.29 44.0 0.40 0.49\n", 699 | " var_norm 47.0 1.18 2.16 40.0 0.35 0.44\n", 700 | " varspread 21.0 0.74 1.18 17.0 0.07 0.30\n", 701 | " 4320 ensemble 70.0 1.32 2.33 53.0 0.45 0.58\n", 702 | " switch 97.0 2.27 4.50 23.0 0.33 0.47\n", 703 | " var_evt 62.0 1.41 2.57 40.0 0.40 0.48\n", 704 | " var_norm 50.0 1.25 2.28 44.0 0.36 0.43\n", 705 | " varspread 21.0 0.74 1.18 17.0 0.07 0.30" 706 | ] 707 | }, 708 | "execution_count": 2, 709 | "metadata": {}, 710 | "output_type": "execute_result" 711 | } 712 | ], 713 | "source": [ 714 | "to_save" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": null, 720 | "metadata": {}, 721 | "outputs": [], 722 | "source": [] 723 | } 724 | ], 725 | "metadata": { 726 | "kernelspec": { 727 | "display_name": "mlvsgarch", 728 | "language": "python", 729 | "name": "mlvsgarch" 730 | }, 731 | "language_info": { 732 | "codemirror_mode": { 733 | "name": "ipython", 734 | "version": 3 735 | }, 736 | "file_extension": ".py", 737 | "mimetype": "text/x-python", 738 | "name": "python", 739 | "nbconvert_exporter": "python", 740 | "pygments_lexer": "ipython3", 741 | "version": "3.6.7" 742 | } 743 | }, 744 | "nbformat": 4, 745 | "nbformat_minor": 4 746 | } 747 | -------------------------------------------------------------------------------- /constant.py: -------------------------------------------------------------------------------- 1 | 2 | QS_NAME = {'01': 0.01, '025': 0.025, '05': 0.05, '10': 0.10} 3 | W_NAME = {'woneday': 24, 'w4months': 2880, 'w6months': 4032} -------------------------------------------------------------------------------- /hist_performance.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/hist_performance.p -------------------------------------------------------------------------------- /min_tpr_exceedance_alpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/min_tpr_exceedance_alpha.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.17.2 2 | matplotlib==3.1.1 3 | pandas==0.25.1 4 | scikit-learn==0.21.3 5 | tensorflow==1.15.2 6 | keras==2.2.5 -------------------------------------------------------------------------------- /result/compare.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn import metrics 4 | from result.utils import get_col_from_wq, get_daily_df 5 | 6 | from sklearn.preprocessing import MinMaxScaler 7 | from sklearn.metrics import auc 8 | 9 | WINDOWS = {'woneday': 24, 'wonemonth': 24 * 30, 'w4months': 24 * 30 * 4, 'w6months': 24 * 30 * 6, 'woneyear': 24 * 365} 10 | 11 | 12 | def qs_from_name(qs_name): 13 | qs = {} 14 | for k in qs_name: 15 | qs[k] = float('0.' + k) 16 | 17 | return qs 18 | 19 | 20 | def min_tpr_from_exceedance(exceedance, alpha): 21 | if exceedance == 0: 22 | return 0 23 | else: 24 | return (exceedance - alpha) / exceedance 25 | 26 | 27 | def min_tpr_from_target(target, alpha): 28 | alpha_bar = sum(target == 1) / len(target) 29 | return min_tpr_from_exceedance(alpha_bar, alpha) 30 | 31 | 32 | def return_fpr_tpr(target, pred, c01, c11): 33 | """ 34 | 35 | :param target: target array 36 | :param pred: pred array 37 | :param c01: cost 38 | :param c11: cost 39 | :return: 40 | """ 41 | cm = metrics.confusion_matrix(target, pred) 42 | class0 = sum(cm[0, :]) 43 | class1 = sum(cm[1, :]) 44 | 45 | p0 = class0 / (class0 + class1) 46 | p1 = 1 - p0 47 | FPR = cm[0, 1] / class0 48 | TPR = cm[1, 1] / class1 49 | 50 | return p0 * c01 * FPR, p1 * c11 * TPR 51 | 52 | 53 | def get_risk_adjusted_roc(target, pred, returns): 54 | # cost FPR 55 | c01 = 1.0 * np.mean(returns.loc[target == 0]) 56 | c11 = -1.0 * np.mean(returns.loc[target == 1]) 57 | threshold = np.linspace(0, 1, 100) 58 | return_roc = np.array([[t, return_fpr_tpr(target, (pred >= t).astype(int), c01, c11)] for t in threshold]) 59 | return_roc = np.array([[a[0], a[1][0], a[1][1]] for a in return_roc]) 60 | 61 | return return_roc[:, 0], return_roc[:, 1], return_roc[:, 2] 62 | 63 | 64 | def get_adjusted_auc(target, pred, returns): 65 | t, fpr, tpr = get_risk_adjusted_roc(target, pred, returns) 66 | scaled = np.zeros((fpr.shape[0], 2)) 67 | scaled[:, 0] = fpr 68 | scaled[:, 1] = tpr 69 | scaler = MinMaxScaler(feature_range=(0, 1)) 70 | scaled = scaler.fit_transform(scaled) 71 | return auc(scaled[:, 0], scaled[:, 1]) 72 | 73 | 74 | def sharpe_ratio(returns, period=1, benchmark=0): 75 | return (returns - benchmark).mean() / (returns - benchmark).std() * np.sqrt(period) 76 | 77 | 78 | def get_daily_finalcomp(finalcomp): 79 | c = list(filter(lambda x: 'ret' in x, finalcomp.columns)) 80 | daily_data = finalcomp[c] 81 | daily_data = daily_data.cumsum() + 1 82 | daily_data = daily_data.loc[pd.date_range(daily_data.index[0], daily_data.index[-1], freq='D')] 83 | daily_data = daily_data.pct_change().dropna() 84 | 85 | return daily_data 86 | 87 | 88 | def get_mdd(K): 89 | dd = K / K.cummax() - 1.0 90 | mdd = dd.cummin() 91 | mdd = abs(min(mdd)) 92 | return mdd 93 | 94 | 95 | def sortino_ratio(returns, period=1): 96 | # Create a downside return column with the negative returns only 97 | downside_returns = returns.loc[returns < 0] 98 | # Calculate expected return and std dev of downside 99 | expected_return = returns.mean() 100 | down_stdev = downside_returns.std() 101 | 102 | return expected_return / down_stdev * np.sqrt(period) 103 | 104 | 105 | def get_table_report(finalcomp, qs_name, ws_name, mlp=False, varspread=False, carl=False, lpa=False, ensemble=False, 106 | var_norm=False, var_evt=False, switch=False): 107 | qs = qs_from_name(qs_name) 108 | ws = WINDOWS 109 | # Get daily data 110 | daily_data = get_daily_df(finalcomp) 111 | 112 | # Build final performance table 113 | index = pd.MultiIndex.from_product([qs_name, ws_name], names=['alpha', 'window']) 114 | model_cols = ['btc', 'lstm', 'garch', 'evtgarch'] 115 | 116 | if varspread: 117 | model_cols = model_cols + ['varspread'] 118 | if carl: 119 | model_cols = model_cols + ['carl'] 120 | if lpa: 121 | model_cols = model_cols + ['lpa'] 122 | if ensemble: 123 | model_cols = model_cols + ['ensemble'] 124 | if mlp: 125 | model_cols = model_cols + ['mlp'] 126 | if var_norm: 127 | model_cols = model_cols + ['var_norm'] 128 | if var_evt: 129 | model_cols = model_cols + ['var_evt'] 130 | if switch: 131 | model_cols = model_cols + ['switch'] 132 | 133 | columns = pd.MultiIndex.from_product([model_cols, 134 | ['ret', 'sr', 'exceedance', 'VaR', 'status', 'mdd', 'sortino']]) 135 | table = pd.DataFrame(index=index, 136 | columns=columns) 137 | 138 | ## total return 139 | print('total return') 140 | for qw in table.index: 141 | c = get_col_from_wq(qw[1], qw[0]) 142 | table.loc[qw, ('btc', 'total_ret')] = np.cumsum(finalcomp['returns']).values[-1] 143 | table.loc[qw, ('lstm', 'total_ret')] = np.cumsum(finalcomp[c['ret_lstm']]).values[-1] 144 | table.loc[qw, ('garch', 'total_ret')] = np.cumsum(finalcomp[c['ret_norm']]).values[-1] 145 | table.loc[qw, ('evtgarch', 'total_ret')] = np.cumsum(finalcomp[c['ret_evt']]).values[-1] 146 | if varspread: 147 | table.loc[qw, ('varspread', 'total_ret')] = np.cumsum(finalcomp[c['ret_varspread']]).values[-1] 148 | if carl: 149 | table.loc[qw, ('carl', 'total_ret')] = np.cumsum(finalcomp[c['ret_carl']]).values[-1] 150 | if lpa: 151 | table.loc[qw, ('lpa', 'total_ret')] = np.cumsum(finalcomp[c['ret_lpa']]).values[-1] 152 | if ensemble: 153 | table.loc[qw, ('ensemble', 'total_ret')] = np.cumsum(finalcomp[c['ret_ensemble']]).values[-1] 154 | if mlp: 155 | table.loc[qw, ('mlp', 'total_ret')] = np.cumsum(finalcomp[c['ret_mlp']]).values[-1] 156 | if var_norm: 157 | table.loc[qw, ('var_norm', 'total_ret')] = np.cumsum(finalcomp[c['ret_var_norm']]).values[-1] 158 | if var_evt: 159 | table.loc[qw, ('var_evt', 'total_ret')] = np.cumsum(finalcomp[c['ret_var_evt']]).values[-1] 160 | if switch: 161 | table.loc[qw, ('switch', 'total_ret')] = np.cumsum(finalcomp[c['ret_switch']]).values[-1] 162 | 163 | print('Avg return') 164 | for qw in table.index: 165 | c = get_col_from_wq(qw[1], qw[0]) 166 | table.loc[qw, ('btc', 'ret')] = np.mean(finalcomp['returns']) 167 | table.loc[qw, ('lstm', 'ret')] = np.mean(finalcomp[c['ret_lstm']]) 168 | table.loc[qw, ('garch', 'ret')] = np.mean(finalcomp[c['ret_norm']]) 169 | table.loc[qw, ('evtgarch', 'ret')] = np.mean(finalcomp[c['ret_evt']]) 170 | if varspread: 171 | table.loc[qw, ('varspread', 'ret')] = np.mean(finalcomp[c['ret_varspread']]) 172 | if carl: 173 | table.loc[qw, ('carl', 'ret')] = np.mean(finalcomp[c['ret_carl']]) 174 | if lpa: 175 | table.loc[qw, ('lpa', 'ret')] = np.mean(finalcomp[c['ret_lpa']]) 176 | if ensemble: 177 | table.loc[qw, ('ensemble', 'ret')] = np.mean(finalcomp[c['ret_ensemble']]) 178 | if mlp: 179 | table.loc[qw, ('mlp', 'ret')] = np.mean(finalcomp[c['ret_mlp']]) 180 | if var_norm: 181 | table.loc[qw, ('var_norm', 'ret')] = np.mean(finalcomp[c['ret_var_norm']]) 182 | if var_evt: 183 | table.loc[qw, ('var_evt', 'ret')] = np.mean(finalcomp[c['ret_var_evt']]) 184 | if switch: 185 | table.loc[qw, ('switch', 'ret')] = np.mean(finalcomp[c['ret_switch']]) 186 | 187 | print('Avg return classes 0 and 1') 188 | for qw in table.index: 189 | c = get_col_from_wq(qw[1], qw[0]) 190 | for class_ in [0, 1]: 191 | table.loc[qw, ('btc', 'ret_%s' % class_)] = np.mean( 192 | finalcomp.loc[finalcomp[c['drop']] == class_, 'returns']) 193 | table.loc[qw, ('lstm', 'ret_%s' % class_)] = (-1.0) ** (class_ == 1) * np.mean( 194 | finalcomp.loc[finalcomp[c['pred_lstm']] == class_, 'returns']) 195 | table.loc[qw, ('garch', 'ret_%s' % class_)] = (-1.0) ** (class_ == 1) * np.mean( 196 | finalcomp.loc[finalcomp[c['pred_norm']] == class_, 'returns']) 197 | table.loc[qw, ('evtgarch', 'ret_%s' % class_)] = (-1.0) ** (class_ == 1) * np.mean( 198 | finalcomp.loc[finalcomp[c['pred_evt']] == class_, 'returns']) 199 | if carl: 200 | table.loc[qw, ('carl', 'ret_%s' % class_)] = (-1.0) ** (class_ == 1) * np.mean( 201 | finalcomp.loc[finalcomp[c['pred_carl']] == class_, 'returns']) 202 | if lpa: 203 | table.loc[qw, ('lpa', 'ret_%s' % class_)] = (-1.0) ** (class_ == 1) * np.mean( 204 | finalcomp.loc[finalcomp[c['pred_lpa']] == class_, 'returns']) 205 | if ensemble: 206 | table.loc[qw, ('ensemble', 'ret_%s' % class_)] = (-1.0) ** (class_ == 1) * np.mean( 207 | finalcomp.loc[finalcomp[c['pred_ensemble']] == class_, 'returns']) 208 | if mlp: 209 | table.loc[qw, ('mlp', 'ret_%s' % class_)] = (-1.0) ** (class_ == 1) * np.mean( 210 | finalcomp.loc[finalcomp[c['pred_mlp']] == class_, 'returns']) 211 | 212 | ## Volatility return 213 | print('Volatility') 214 | for qw in table.index: 215 | c = get_col_from_wq(qw[1], qw[0]) 216 | table.loc[qw, ('btc', 'vol')] = np.std(finalcomp['returns']) 217 | table.loc[qw, ('lstm', 'vol')] = np.std(finalcomp[c['ret_lstm']]) 218 | table.loc[qw, ('garch', 'vol')] = np.std(finalcomp[c['ret_norm']]) 219 | table.loc[qw, ('evtgarch', 'vol')] = np.std(finalcomp[c['ret_evt']]) 220 | if varspread: 221 | table.loc[qw, ('varspread', 'vol')] = np.std(finalcomp[c['ret_varspread']]) 222 | if carl: 223 | table.loc[qw, ('carl', 'vol')] = np.std(finalcomp[c['ret_carl']]) 224 | if lpa: 225 | table.loc[qw, ('lpa', 'vol')] = np.std(finalcomp[c['ret_lpa']]) 226 | if ensemble: 227 | table.loc[qw, ('ensemble', 'vol')] = np.std(finalcomp[c['ret_ensemble']]) 228 | if mlp: 229 | table.loc[qw, ('mlp', 'vol')] = np.std(finalcomp[c['ret_mlp']]) 230 | if var_norm: 231 | table.loc[qw, ('var_norm', 'vol')] = np.std(finalcomp[c['ret_var_norm']]) 232 | if var_evt: 233 | table.loc[qw, ('var_evt', 'vol')] = np.std(finalcomp[c['ret_var_evt']]) 234 | if switch: 235 | table.loc[qw, ('switch', 'vol')] = np.std(finalcomp[c['ret_switch']]) 236 | 237 | ## Sharpe ratio 238 | print('Excess sharpe ratio') 239 | for qw in table.index: 240 | c = get_col_from_wq(qw[1], qw[0]) 241 | table.loc[qw, ('btc', 'excess_sr')] = sharpe_ratio(finalcomp['returns'], 242 | period=365 * 24) 243 | 244 | table.loc[qw, ('lstm', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_lstm']], 245 | period=365 * 24, 246 | benchmark=finalcomp['returns']) 247 | 248 | table.loc[qw, ('garch', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_norm']], 249 | period=365 * 24, 250 | benchmark=finalcomp['returns']) 251 | 252 | table.loc[qw, ('evtgarch', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_evt']], 253 | period=365 * 24, 254 | benchmark=finalcomp['returns']) 255 | if varspread: 256 | table.loc[qw, ('varspread', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_varspread']], 257 | period=365 * 24, 258 | benchmark=finalcomp['returns']) 259 | if carl: 260 | table.loc[qw, ('carl', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_carl']], 261 | period=365 * 24, 262 | benchmark=finalcomp['returns']) 263 | if lpa: 264 | table.loc[qw, ('lpa', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_lpa']], 265 | period=365 * 24, 266 | benchmark=finalcomp['returns']) 267 | if ensemble: 268 | table.loc[qw, ('ensemble', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_ensemble']], 269 | period=365 * 24, 270 | benchmark=finalcomp['returns']) 271 | if mlp: 272 | table.loc[qw, ('mlp', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_mlp']], 273 | period=365 * 24, 274 | benchmark=finalcomp['returns']) 275 | if var_norm: 276 | table.loc[qw, ('var_norm', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_var_norm']], 277 | period=365 * 24, 278 | benchmark=finalcomp['returns']) 279 | if var_evt: 280 | table.loc[qw, ('var_evt', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_var_evt']], 281 | period=365 * 24, 282 | benchmark=finalcomp['returns']) 283 | if switch: 284 | table.loc[qw, ('switch', 'excess_sr')] = sharpe_ratio(finalcomp[c['ret_switch']], 285 | period=365 * 24, 286 | benchmark=finalcomp['returns']) 287 | print('Sharpe ratio') 288 | for qw in table.index: 289 | c = get_col_from_wq(qw[1], qw[0]) 290 | table.loc[qw, ('btc', 'sr')] = sharpe_ratio(finalcomp['returns'], 291 | period=365 * 24) 292 | table.loc[qw, ('lstm', 'sr')] = sharpe_ratio(finalcomp[c['ret_lstm']], 293 | period=365 * 24) 294 | table.loc[qw, ('garch', 'sr')] = sharpe_ratio(finalcomp[c['ret_norm']], 295 | period=365 * 24) 296 | table.loc[qw, ('evtgarch', 'sr')] = sharpe_ratio(finalcomp[c['ret_evt']], 297 | period=365 * 24) 298 | if varspread: 299 | table.loc[qw, ('varspread', 'sr')] = sharpe_ratio(finalcomp[c['ret_varspread']], 300 | period=365 * 24) 301 | if carl: 302 | table.loc[qw, ('carl', 'sr')] = sharpe_ratio(finalcomp[c['ret_carl']], 303 | period=365 * 24) 304 | if lpa: 305 | table.loc[qw, ('lpa', 'sr')] = sharpe_ratio(finalcomp[c['ret_lpa']], 306 | period=365 * 24) 307 | if ensemble: 308 | table.loc[qw, ('ensemble', 'sr')] = sharpe_ratio(finalcomp[c['ret_ensemble']], 309 | period=365 * 24) 310 | if mlp: 311 | table.loc[qw, ('mlp', 'sr')] = sharpe_ratio(finalcomp[c['ret_mlp']], 312 | period=365 * 24) 313 | if var_norm: 314 | table.loc[qw, ('var_norm', 'sr')] = sharpe_ratio(finalcomp[c['ret_var_norm']], 315 | period=365 * 24) 316 | if var_evt: 317 | table.loc[qw, ('var_evt', 'sr')] = sharpe_ratio(finalcomp[c['ret_var_evt']], 318 | period=365 * 24) 319 | if switch: 320 | table.loc[qw, ('switch', 'sr')] = sharpe_ratio(finalcomp[c['ret_switch']], 321 | period=365 * 24) 322 | 323 | ## Exceedance 324 | print('Exceedance') 325 | for qw in table.index: 326 | c = get_col_from_wq(qw[1], qw[0]) 327 | table.loc[qw, ('btc', 'exceedance')] = finalcomp[c['drop']].sum() / len(finalcomp) 328 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_lstm']]) 329 | table.loc[qw, ('lstm', 'exceedance')] = cm[1, 0] / np.sum(cm) 330 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_norm']]) 331 | table.loc[qw, ('garch', 'exceedance')] = cm[1, 0] / np.sum(cm) 332 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_evt']]) 333 | table.loc[qw, ('evtgarch', 'exceedance')] = cm[1, 0] / np.sum(cm) 334 | if varspread: 335 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_varspread']]) 336 | table.loc[qw, ('varspread', 'exceedance')] = cm[1, 0] / np.sum(cm) 337 | if carl: 338 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_carl']]) 339 | table.loc[qw, ('carl', 'exceedance')] = cm[1, 0] / np.sum(cm) 340 | if lpa: 341 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_lpa']]) 342 | table.loc[qw, ('lpa', 'exceedance')] = cm[1, 0] / np.sum(cm) 343 | if ensemble: 344 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_ensemble']]) 345 | table.loc[qw, ('ensemble', 'exceedance')] = cm[1, 0] / np.sum(cm) 346 | if mlp: 347 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_mlp']]) 348 | table.loc[qw, ('mlp', 'exceedance')] = cm[1, 0] / np.sum(cm) 349 | if var_norm: 350 | table.loc[qw, ('var_norm', 'exceedance')] = np.sum( 351 | finalcomp[c['ret_var_norm']] < finalcomp[c['lower']]) / len(finalcomp) 352 | if var_evt: 353 | table.loc[qw, ('var_evt', 'exceedance')] = np.sum( 354 | finalcomp[c['ret_var_evt']] < finalcomp[c['lower']]) / len(finalcomp) 355 | if switch: 356 | table.loc[qw, ('switch', 'exceedance')] = np.sum( 357 | finalcomp[c['ret_switch']] < finalcomp[c['lower']]) / len(finalcomp) 358 | 359 | # Value-At-Risk and status 360 | print('Value-At-Risk and status') 361 | for qw in table.index: 362 | c = get_col_from_wq(qw[1], qw[0]) 363 | table.loc[qw, ('btc', 'VaR')] = - np.quantile(finalcomp['returns'], qs[qw[0]]) 364 | table.loc[qw, ('btc', 'status')] = np.quantile(finalcomp['returns'], qs[qw[0]]) > np.quantile( 365 | finalcomp['returns'], qs[qw[0]]) 366 | 367 | table.loc[qw, ('lstm', 'VaR')] = - np.quantile(finalcomp[c['ret_lstm']], qs[qw[0]]) 368 | table.loc[qw, ('lstm', 'status')] = np.quantile(finalcomp[c['ret_lstm']], qs[qw[0]]) > np.quantile( 369 | finalcomp['returns'], qs[qw[0]]) 370 | 371 | table.loc[qw, ('garch', 'VaR')] = - np.quantile(finalcomp[c['ret_norm']], qs[qw[0]]) 372 | table.loc[qw, ('garch', 'status')] = np.quantile(finalcomp[c['ret_norm']], qs[qw[0]]) > np.quantile( 373 | finalcomp['returns'], qs[qw[0]]) 374 | 375 | table.loc[qw, ('evtgarch', 'VaR')] = - np.quantile(finalcomp[c['ret_evt']], qs[qw[0]]) 376 | table.loc[qw, ('evtgarch', 'status')] = np.quantile(finalcomp[c['ret_evt']], qs[qw[0]]) > np.quantile( 377 | finalcomp['returns'], qs[qw[0]]) 378 | 379 | if varspread: 380 | table.loc[qw, ('varspread', 'VaR')] = - np.quantile(finalcomp[c['ret_varspread']], qs[qw[0]]) 381 | table.loc[qw, ('varspread', 'status')] = np.quantile(finalcomp[c['ret_varspread']], 382 | qs[qw[0]]) > np.quantile(finalcomp['returns'], 383 | qs[qw[0]]) 384 | if carl: 385 | table.loc[qw, ('carl', 'VaR')] = - np.quantile(finalcomp[c['ret_carl']], qs[qw[0]]) 386 | table.loc[qw, ('carl', 'status')] = np.quantile(finalcomp[c['ret_carl']], qs[qw[0]]) > np.quantile( 387 | finalcomp['returns'], qs[qw[0]]) 388 | if lpa: 389 | table.loc[qw, ('lpa', 'VaR')] = - np.quantile(finalcomp[c['ret_lpa']], qs[qw[0]]) 390 | table.loc[qw, ('lpa', 'status')] = np.quantile(finalcomp[c['ret_lpa']], qs[qw[0]]) > np.quantile( 391 | finalcomp['returns'], qs[qw[0]]) 392 | if ensemble: 393 | table.loc[qw, ('ensemble', 'VaR')] = - np.quantile(finalcomp[c['ret_ensemble']], qs[qw[0]]) 394 | table.loc[qw, ('ensemble', 'status')] = np.quantile(finalcomp[c['ret_ensemble']], qs[qw[0]]) > np.quantile( 395 | finalcomp['returns'], qs[qw[0]]) 396 | if mlp: 397 | table.loc[qw, ('mlp', 'VaR')] = - np.quantile(finalcomp[c['ret_mlp']], qs[qw[0]]) 398 | table.loc[qw, ('mlp', 'status')] = np.quantile(finalcomp[c['ret_mlp']], qs[qw[0]]) > np.quantile( 399 | finalcomp['returns'], qs[qw[0]]) 400 | if var_norm: 401 | table.loc[qw, ('var_norm', 'VaR')] = - np.quantile(finalcomp[c['ret_var_norm']], qs[qw[0]]) 402 | table.loc[qw, ('var_norm', 'status')] = np.quantile(finalcomp[c['ret_var_norm']], qs[qw[0]]) > np.quantile( 403 | finalcomp['returns'], qs[qw[0]]) 404 | if var_evt: 405 | table.loc[qw, ('var_evt', 'VaR')] = - np.quantile(finalcomp[c['ret_var_evt']], qs[qw[0]]) 406 | table.loc[qw, ('var_evt', 'status')] = np.quantile(finalcomp[c['ret_var_evt']], qs[qw[0]]) > np.quantile( 407 | finalcomp['returns'], qs[qw[0]]) 408 | if switch: 409 | table.loc[qw, ('switch', 'VaR')] = - np.quantile(finalcomp[c['ret_switch']], qs[qw[0]]) 410 | table.loc[qw, ('switch', 'status')] = np.quantile(finalcomp[c['ret_switch']], qs[qw[0]]) > np.quantile( 411 | finalcomp['returns'], qs[qw[0]]) 412 | 413 | # MDD 414 | print('Max DrawDown') 415 | for qw in table.index: 416 | c = get_col_from_wq(qw[1], qw[0]) 417 | table.loc[qw, ('btc', 'mdd')] = get_mdd((finalcomp['returns'] + 1).cumprod()) 418 | table.loc[qw, ('lstm', 'mdd')] = get_mdd((finalcomp[c['ret_lstm']] + 1).cumprod()) 419 | table.loc[qw, ('garch', 'mdd')] = get_mdd((finalcomp[c['ret_norm']] + 1).cumprod()) 420 | table.loc[qw, ('evtgarch', 'mdd')] = get_mdd((finalcomp[c['ret_evt']] + 1).cumprod()) 421 | if varspread: 422 | table.loc[qw, ('varspread', 'mdd')] = get_mdd((finalcomp[c['ret_varspread']] + 1).cumprod()) 423 | if carl: 424 | table.loc[qw, ('carl', 'mdd')] = get_mdd((finalcomp[c['ret_carl']] + 1).cumprod()) 425 | if lpa: 426 | table.loc[qw, ('lpa', 'mdd')] = get_mdd((finalcomp[c['ret_lpa']] + 1).cumprod()) 427 | if ensemble: 428 | table.loc[qw, ('ensemble', 'mdd')] = get_mdd((finalcomp[c['ret_ensemble']] + 1).cumprod()) 429 | if mlp: 430 | table.loc[qw, ('mlp', 'mdd')] = get_mdd((finalcomp[c['ret_mlp']] + 1).cumprod()) 431 | if var_norm: 432 | table.loc[qw, ('var_norm', 'mdd')] = get_mdd((finalcomp[c['ret_var_norm']] + 1).cumprod()) 433 | if var_evt: 434 | table.loc[qw, ('var_evt', 'mdd')] = get_mdd((finalcomp[c['ret_var_evt']] + 1).cumprod()) 435 | if switch: 436 | table.loc[qw, ('switch', 'mdd')] = get_mdd((finalcomp[c['ret_switch']] + 1).cumprod()) 437 | 438 | # Sortino ratio 439 | print('Sortino ratio') 440 | for qw in table.index: 441 | c = get_col_from_wq(qw[1], qw[0]) 442 | table.loc[qw, ('btc', 'sortino')] = sortino_ratio(daily_data['returns'], period=365) 443 | table.loc[qw, ('lstm', 'sortino')] = sortino_ratio(daily_data[c['ret_lstm']], period=365) 444 | table.loc[qw, ('garch', 'sortino')] = sortino_ratio(daily_data[c['ret_norm']], period=365) 445 | table.loc[qw, ('evtgarch', 'sortino')] = sortino_ratio(daily_data[c['ret_evt']], period=365) 446 | if varspread: 447 | table.loc[qw, ('varspread', 'sortino')] = sortino_ratio(daily_data[c['ret_varspread']], period=365) 448 | if carl: 449 | table.loc[qw, ('carl', 'sortino')] = sortino_ratio(daily_data[c['ret_carl']], period=365) 450 | if lpa: 451 | table.loc[qw, ('lpa', 'sortino')] = sortino_ratio(daily_data[c['ret_lpa']], period=365) 452 | if ensemble: 453 | table.loc[qw, ('ensemble', 'sortino')] = sortino_ratio(daily_data[c['ret_ensemble']], period=365) 454 | if mlp: 455 | table.loc[qw, ('mlp', 'sortino')] = sortino_ratio(daily_data[c['ret_mlp']], period=365) 456 | if var_norm: 457 | table.loc[qw, ('var_norm', 'sortino')] = sortino_ratio(daily_data[c['ret_var_norm']], period=365) 458 | if var_evt: 459 | table.loc[qw, ('var_evt', 'sortino')] = sortino_ratio(daily_data[c['ret_var_evt']], period=365) 460 | if switch: 461 | table.loc[qw, ('switch', 'sortino')] = sortino_ratio(daily_data[c['ret_switch']], period=365) 462 | 463 | # Classification 464 | # Brier score 465 | print('Brier score') 466 | for qw in table.index: 467 | c = get_col_from_wq(qw[1], qw[0]) 468 | table.loc[qw, ('lstm', 'brier')] = metrics.brier_score_loss(finalcomp[c['drop']], finalcomp[c['proba_lstm']]) 469 | table.loc[qw, ('garch', 'brier')] = metrics.brier_score_loss(finalcomp[c['drop']], finalcomp[c['proba_norm']]) 470 | table.loc[qw, ('evtgarch', 'brier')] = metrics.brier_score_loss(finalcomp[c['drop']], finalcomp[c['proba_evt']]) 471 | if carl: 472 | table.loc[qw, ('carl', 'brier')] = metrics.brier_score_loss(finalcomp[c['drop']], 473 | finalcomp[c['proba_carl']]) 474 | if lpa: 475 | table.loc[qw, ('lpa', 'brier')] = metrics.brier_score_loss(finalcomp[c['drop']], finalcomp[c['proba_lpa']]) 476 | if ensemble: 477 | table.loc[qw, ('ensemble', 'brier')] = metrics.brier_score_loss(finalcomp[c['drop']], 478 | finalcomp[c['proba_ensemble']]) 479 | if mlp: 480 | table.loc[qw, ('mlp', 'brier')] = metrics.brier_score_loss(finalcomp[c['drop']], finalcomp[c['proba_mlp']]) 481 | 482 | # Cross entropy 483 | print('Cross entropy') 484 | for qw in table.index: 485 | c = get_col_from_wq(qw[1], qw[0]) 486 | table.loc[qw, ('lstm', 'log_loss')] = metrics.log_loss(finalcomp[c['drop']], finalcomp[c['proba_lstm']]) 487 | table.loc[qw, ('garch', 'log_loss')] = metrics.log_loss(finalcomp[c['drop']], finalcomp[c['proba_norm']]) 488 | table.loc[qw, ('evtgarch', 'log_loss')] = metrics.log_loss(finalcomp[c['drop']], finalcomp[c['proba_evt']]) 489 | if carl: 490 | table.loc[qw, ('carl', 'log_loss')] = metrics.log_loss(finalcomp[c['drop']], finalcomp[c['proba_carl']]) 491 | if lpa: 492 | table.loc[qw, ('lpa', 'log_loss')] = metrics.log_loss(finalcomp[c['drop']], finalcomp[c['proba_lpa']]) 493 | if ensemble: 494 | table.loc[qw, ('ensemble', 'log_loss')] = metrics.log_loss(finalcomp[c['drop']], 495 | finalcomp[c['proba_ensemble']]) 496 | if mlp: 497 | table.loc[qw, ('mlp', 'log_loss')] = metrics.log_loss(finalcomp[c['drop']], finalcomp[c['proba_mlp']]) 498 | 499 | # TPR, TNR 500 | print('TPR and TNR') 501 | for qw in table.index: 502 | c = get_col_from_wq(qw[1], qw[0]) 503 | table.loc[qw, ('btc', 'tpr')] = min_tpr_from_target(finalcomp[c['drop']], float('0.%s' % qw[0])) 504 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_lstm']]) 505 | table.loc[qw, ('lstm', 'tpr')] = cm[1, 1] / cm.sum(1)[-1] 506 | table.loc[qw, ('lstm', 'tnr')] = cm[0, 0] / cm.sum(1)[0] 507 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_norm']]) 508 | table.loc[qw, ('garch', 'tpr')] = cm[1, 1] / cm.sum(1)[-1] 509 | table.loc[qw, ('garch', 'tnr')] = cm[0, 0] / cm.sum(1)[0] 510 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_evt']]) 511 | table.loc[qw, ('evtgarch', 'tpr')] = cm[1, 1] / cm.sum(1)[-1] 512 | table.loc[qw, ('evtgarch', 'tnr')] = cm[0, 0] / cm.sum(1)[0] 513 | if varspread: 514 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_varspread']]) 515 | table.loc[qw, ('varspread', 'tpr')] = cm[1, 1] / cm.sum(1)[-1] 516 | table.loc[qw, ('varspread', 'tnr')] = cm[0, 0] / cm.sum(1)[0] 517 | if carl: 518 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_carl']]) 519 | table.loc[qw, ('carl', 'tpr')] = cm[1, 1] / cm.sum(1)[-1] 520 | table.loc[qw, ('carl', 'tnr')] = cm[0, 0] / cm.sum(1)[0] 521 | if lpa: 522 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_lpa']]) 523 | table.loc[qw, ('lpa', 'tpr')] = cm[1, 1] / cm.sum(1)[-1] 524 | table.loc[qw, ('lpa', 'tnr')] = cm[0, 0] / cm.sum(1)[0] 525 | if ensemble: 526 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_ensemble']]) 527 | table.loc[qw, ('ensemble', 'tpr')] = cm[1, 1] / cm.sum(1)[-1] 528 | table.loc[qw, ('ensemble', 'tnr')] = cm[0, 0] / cm.sum(1)[0] 529 | if mlp: 530 | cm = metrics.confusion_matrix(finalcomp[c['drop']], finalcomp[c['pred_ensemble']]) 531 | table.loc[qw, ('mlp', 'tpr')] = cm[1, 1] / cm.sum(1)[-1] 532 | table.loc[qw, ('mlp', 'tnr')] = cm[0, 0] / cm.sum(1)[0] 533 | 534 | # F score 535 | print('F score') 536 | for qw in table.index: 537 | c = get_col_from_wq(qw[1], qw[0]) 538 | table.loc[qw, ('lstm', 'fscore')] = metrics.f1_score(finalcomp[c['drop']], finalcomp[c['pred_lstm']]) 539 | table.loc[qw, ('garch', 'fscore')] = metrics.f1_score(finalcomp[c['drop']], finalcomp[c['pred_norm']]) 540 | table.loc[qw, ('evtgarch', 'fscore')] = metrics.f1_score(finalcomp[c['drop']], finalcomp[c['pred_evt']]) 541 | if varspread: 542 | table.loc[qw, ('varspread', 'fscore')] = metrics.f1_score(finalcomp[c['drop']], 543 | finalcomp[c['pred_varspread']]) 544 | if carl: 545 | table.loc[qw, ('carl', 'fscore')] = metrics.f1_score(finalcomp[c['drop']], finalcomp[c['pred_carl']]) 546 | if lpa: 547 | table.loc[qw, ('lpa', 'fscore')] = metrics.f1_score(finalcomp[c['drop']], finalcomp[c['pred_lpa']]) 548 | if ensemble: 549 | table.loc[qw, ('ensemble', 'fscore')] = metrics.f1_score(finalcomp[c['drop']], 550 | finalcomp[c['pred_ensemble']]) 551 | if mlp: 552 | table.loc[qw, ('mlp', 'fscore')] = metrics.f1_score(finalcomp[c['drop']], finalcomp[c['pred_mlp']]) 553 | 554 | # AUC 555 | print('AUC score') 556 | for qw in table.index: 557 | c = get_col_from_wq(qw[1], qw[0]) 558 | table.loc[qw, ('lstm', 'auc')] = metrics.roc_auc_score(finalcomp[c['drop']], finalcomp[c['proba_lstm']]) 559 | table.loc[qw, ('garch', 'auc')] = metrics.roc_auc_score(finalcomp[c['drop']], finalcomp[c['proba_norm']]) 560 | table.loc[qw, ('evtgarch', 'auc')] = metrics.roc_auc_score(finalcomp[c['drop']], finalcomp[c['proba_evt']]) 561 | if carl: 562 | table.loc[qw, ('carl', 'auc')] = metrics.roc_auc_score(finalcomp[c['drop']], finalcomp[c['proba_carl']]) 563 | if lpa: 564 | table.loc[qw, ('lpa', 'auc')] = metrics.roc_auc_score(finalcomp[c['drop']], finalcomp[c['proba_lpa']]) 565 | if ensemble: 566 | table.loc[qw, ('ensemble', 'auc')] = metrics.roc_auc_score(finalcomp[c['drop']], 567 | finalcomp[c['proba_ensemble']]) 568 | if mlp: 569 | table.loc[qw, ('mlp', 'auc')] = metrics.roc_auc_score(finalcomp[c['drop']], finalcomp[c['proba_mlp']]) 570 | 571 | # Risk-adjusted AUC 572 | print('Risk-adjusted AUC') 573 | for qw in table.index: 574 | c = get_col_from_wq(qw[1], qw[0]) 575 | 576 | table.loc[qw, ('lstm', 'risk_auc')] = get_adjusted_auc(finalcomp[c['drop']], finalcomp[c['proba_lstm']], 577 | finalcomp['returns']) 578 | table.loc[qw, ('garch', 'risk_auc')] = get_adjusted_auc(finalcomp[c['drop']], finalcomp[c['proba_norm']], 579 | finalcomp['returns']) 580 | table.loc[qw, ('evtgarch', 'risk_auc')] = get_adjusted_auc(finalcomp[c['drop']], finalcomp[c['proba_evt']], 581 | finalcomp['returns']) 582 | if carl: 583 | table.loc[qw, ('carl', 'risk_auc')] = get_adjusted_auc(finalcomp[c['drop']], 584 | finalcomp[c['proba_carl']], finalcomp['returns']) 585 | if lpa: 586 | table.loc[qw, ('lpa', 'risk_auc')] = get_adjusted_auc(finalcomp[c['drop']], finalcomp[c['proba_lpa']], 587 | finalcomp['returns']) 588 | if ensemble: 589 | table.loc[qw, ('ensemble', 'risk_auc')] = get_adjusted_auc(finalcomp[c['drop']], 590 | finalcomp[c['proba_ensemble']], 591 | finalcomp['returns']) 592 | if mlp: 593 | table.loc[qw, ('mlp', 'risk_auc')] = get_adjusted_auc(finalcomp[c['drop']], finalcomp[c['proba_mlp']], 594 | finalcomp['returns']) 595 | 596 | # Respected constraint 597 | print('Respected constraint') 598 | lc = ['lstm', 'garch', 'evtgarch'] 599 | if varspread: 600 | lc = lc + ['varspread'] 601 | if carl: 602 | lc = lc + ['carl'] 603 | if lpa: 604 | lc = lc + ['lpa'] 605 | if ensemble: 606 | lc = lc + ['ensemble'] 607 | if mlp: 608 | lc = lc + ['mlp'] 609 | for c in lc: 610 | table[(c, 'respected_cstr')] = table.loc[:, 'btc']['tpr'] <= table.loc[:, c]['tpr'] 611 | 612 | # Rearange columns order 613 | table.sort_index(axis=1, level=0, inplace=True) 614 | 615 | new_ind = pd.MultiIndex.from_product([[0.01, 0.025, 0.05, 0.1], [24, 2880, 4320]]) 616 | table.index = new_ind 617 | 618 | return table 619 | -------------------------------------------------------------------------------- /result/cvdates.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/result/cvdates.p -------------------------------------------------------------------------------- /result/final_table.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/result/final_table.p -------------------------------------------------------------------------------- /result/utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def get_boxdata(cvtable, metric, classifiers, qw=(0.01, 24)): 5 | boxdata = [] 6 | for m in classifiers: 7 | mdata = [] 8 | for k in cvtable: 9 | mdata.append(cvtable[k][m][metric].loc[qw]) 10 | boxdata.append(mdata) 11 | return boxdata 12 | 13 | 14 | def get_daily_df(finalcomp): 15 | c = list(filter(lambda x: 'ret' in x, finalcomp.columns)) 16 | daily_data = finalcomp[c] 17 | daily_data = daily_data.cumsum() + 1 18 | daily_data = daily_data.loc[pd.date_range(daily_data.index[0], daily_data.index[-1], freq='D')] 19 | daily_data = daily_data.pct_change().dropna() 20 | 21 | return daily_data 22 | 23 | 24 | def get_col_from_wq(w, q): 25 | columns = {'drop': 'drop_%s_0.%s' % (w, q), 26 | 'lower': 'lower_%s_0.%s' % (w, q), 27 | 'proba_dl': 'proba_dl_%s_0.%s' % (w, q), 28 | 'proba_lstm': 'proba_lstm_%s_0.%s' % (w, q), 29 | 'proba_mlp': 'proba_mlp_%s_0.%s' % (w, q), 30 | 'proba_norm': 'proba_norm_%s_0.%s' % (w, q), 31 | 'proba_evt': 'proba_evt_%s_0.%s' % (w, q), 32 | 'proba_carl': 'proba_carl_%s_0.%s' % (w, q), 33 | 'proba_lpa': 'proba_lpa_norm_%s_0.%s' % (w, q), 34 | 'proba_ensemble': 'proba_ensemble_%s_0.%s' % (w, q), 35 | 'ret_dl': 'ret_dl_%s_0.%s' % (w, q), 36 | 'ret_lstm': 'ret_lstm_%s_0.%s' % (w, q), 37 | 'ret_mlp': 'ret_mlp_%s_0.%s' % (w, q), 38 | 'ret_norm': 'ret_norm_%s_0.%s' % (w, q), 39 | 'ret_evt': 'ret_evt_%s_0.%s' % (w, q), 40 | 'ret_varspread': 'ret_varspread_%s_0.%s' % (w, q), 41 | 'ret_carl': 'ret_carl_%s_0.%s' % (w, q), 42 | 'ret_lpa': 'ret_lpa_norm_%s_0.%s' % (w, q), 43 | 'ret_ensemble': 'ret_ensemble_%s_0.%s' % (w, q), 44 | 'ret_var_norm': 'ret_var_norm_%s_0.%s' % (w, q), 45 | 'ret_var_evt': 'ret_var_evt_%s_0.%s' % (w, q), 46 | 'ret_switch': 'ret_switch_%s_0.%s' % (w, q), 47 | 'pred_dl': 'pred_dl_%s_0.%s' % (w, q), 48 | 'pred_lstm': 'pred_lstm_%s_0.%s' % (w, q), 49 | 'pred_mlp': 'pred_mlp_%s_0.%s' % (w, q), 50 | 'pred_norm': 'pred_norm_%s_0.%s' % (w, q), 51 | 'pred_evt': 'pred_evt_%s_0.%s' % (w, q), 52 | 'pred_varspread': 'label_varspread_%s_0.%s' % (w, q), 53 | 'pred_carl': 'pred_carl_%s_0.%s' % (w, q), 54 | 'pred_lpa': 'pred_lpa_norm_%s_0.%s' % (w, q), 55 | 'pred_ensemble': 'pred_ensemble_%s_0.%s' % (w, q) 56 | } 57 | return columns 58 | 59 | 60 | def qs_from_name(qs_name): 61 | qs = {} 62 | for k in qs_name: 63 | qs[k] = float('0.' + k) 64 | 65 | return qs 66 | -------------------------------------------------------------------------------- /strat_excess_perf_downtrend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/strat_excess_perf_downtrend.png -------------------------------------------------------------------------------- /strat_excess_perf_uptrend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/strat_excess_perf_uptrend.png -------------------------------------------------------------------------------- /strat_historical_perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/strat_historical_perf.png -------------------------------------------------------------------------------- /strat_switch_ens_varspread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/strat_switch_ens_varspread.png -------------------------------------------------------------------------------- /tvar_0.01_w.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantLet/MLvsGARCH/b90559543de02145f92abab74ab6cae2441c6bc7/tvar_0.01_w.png --------------------------------------------------------------------------------