├── Images └── LSTM-MSNet-Framework.PNG ├── README.md ├── docs └── bandara2019LSTM-MsNet.pdf └── src ├── Benchmarks ├── DynamicHaromicRegressionArima.R ├── DynamicHaromicRegressiontslm.R ├── FFORMA.R ├── Prophet.R └── TBATS.R ├── LSTM-Models ├── configs │ ├── global_config.py │ └── initial_hyperparameter_values │ │ └── energy_hourly ├── error_calculator │ └── moving_window │ │ ├── .Rhistory │ │ ├── energy_DS_evaluation.R │ │ └── energy_SE_evaluation.R ├── external_packages │ └── cocob_optimizer │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── cocob_optimizer.cpython-36.pyc │ │ └── cocob_optimizer.py ├── generic_model_energy_DS_test.py ├── generic_model_energy_DS_trainer.py ├── generic_model_energy_SE_test.py ├── generic_model_energy_SE_trainer.py ├── preprocess_scripts │ ├── Baseline │ │ └── energy_create_tfrecords_baseline.py │ ├── DS │ │ ├── MSTL-7-DS │ │ │ └── energy_create_tfrecords_mean_hourly_mstl7.py │ │ ├── MSTL-DS │ │ │ └── energy_create_tfrecords_mean_hourly_mstl.py │ │ ├── Prophet-DS │ │ │ └── energy_create_tfrecords_mean_hourly_prophet.py │ │ └── TBATS-DS │ │ │ └── energy_create_tfrecords_mean_hourly_tbats.py │ └── SE │ │ ├── Fourier-SE-K1 │ │ └── energy_create_tfrecords_fourier.py │ │ ├── Fourier-SE │ │ └── energy_create_tfrecords_fourier.py │ │ ├── MSTL-7-SE │ │ └── energy_create_tfrecords_mstl7_feature.py │ │ ├── MSTL-SE │ │ └── energy_create_tfrecords_mstl_feature.py │ │ ├── Prophet-SE │ │ └── energy_create_tfrecords_prophet_feature.py │ │ └── TBATS-SE │ │ └── energy_create_tfrecords_tbats_feature.py ├── results │ ├── errors │ │ └── empty_commit.txt │ ├── optimized_configurations │ │ └── empty_commit.txt │ ├── processed_rnn_forecasts │ │ └── empty_commit.txt │ ├── rnn_forecasts │ │ └── empty_commit.txt │ └── validation_errors │ │ └── empty_commit.txt ├── rnn_architectures │ └── stacking_model │ │ ├── energy_stacking_model_DS_tester.py │ │ ├── energy_stacking_model_DS_trainer.py │ │ ├── energy_stacking_model_SE_tester.py │ │ └── energy_stacking_model_SE_trainer.py ├── tfrecords_handler │ └── moving_window │ │ ├── energy_tfrecord_DS_reader.py │ │ ├── energy_tfrecord_DS_writer.py │ │ ├── energy_tfrecord_SE_reader.py │ │ └── energy_tfrecord_SE_writer.py └── utility_scripts │ ├── hyperparameter_scripts │ ├── hyperparameter_config_reader.py │ └── hyperparameter_summary_generator.py │ ├── invoke_r_energy_DS.py │ ├── invoke_r_energy_SE.py │ └── persist_optimized_config_results.py └── LSTM-Preprocessing-Scripts ├── Baseline ├── Mean_Moving_window │ └── empty_commit.txt ├── energy_baseline_hourly_test.R └── energy_baseline_train_validation.R ├── DS ├── LSTM-MSTL-DS │ ├── Mean_Moving_window │ │ └── empty_commit.txt │ ├── energy_mstl_test.R │ └── energy_mstl_train_validation.R ├── LSTM-MSTL7-DS │ ├── Mean_Moving_window │ │ └── empty_commit.txt │ ├── energy_mstl7_test.R │ └── energy_mstl7_train_validation.R ├── LSTM-Prophet-DS │ ├── Mean_Moving_window │ │ └── empty_commit.txt │ ├── energy_prophet_test.R │ └── energy_prophet_train_validation.R └── LSTM-Tbats-DS │ ├── Mean_Moving_window │ └── empty_commit.txt │ ├── energy_tbats_test.R │ └── energy_tbats_train_validation.R └── SE ├── LSTM-Fourier-SE-1 ├── Mean_Moving_window │ └── empty_commit.txt ├── energy_fourierk1_test.R └── energy_fourierk1_train_validation.R ├── LSTM-Fourier-SE ├── Mean_Moving_window │ └── empty_commit.txt ├── energy_fourier_test.R └── energy_fourier_train_validation.R ├── LSTM-MSTL-SE ├── Mean_Moving_window │ └── empty_commit.txt ├── energy_mstl_test.R └── energy_mstl_train_validation.R ├── LSTM-MSTL7-SE ├── Mean_Moving_window │ └── empty_commit.txt ├── energy_mstl7_test.R └── energy_mstl7_train_validation.R ├── LSTM-Prophet-SE ├── Mean_Moving_window │ └── empty_commit.txt ├── energy_prophet_test.R └── energy_prophet_train_validation.R └── LSTM-Tbats-SE ├── Mean_Moving_window └── empty_commit.txt ├── energy_tbats_test.R └── energy_tbats_train_validation.R /Images/LSTM-MSNet-Framework.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/Images/LSTM-MSNet-Framework.PNG -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | LSTM-MSNet: Leveraging Forecasts on Sets of Related Time Series with Multiple Seasonal Patterns 2 | =================== 3 | 4 | This page contains the explanation of our **L**ong **S**hort-**T**erm **M**emory **M**ulti-**S**easonal **Net** (LSTM-MSNet) forecasting framework, which can be used to forecast a sets of time series with multiple seasonal patterns. 5 | 6 | In the description, we first provide a breif introduction to our methdology, and then explain the steps to be followed to execute our code and use our framework for your research work. 7 | 8 | # Methodology # 9 | 10 | 11 | 12 | The above figure gives an overview of the proposed LSTM-MSNet training paradigms. In the DS approach, deseasonalised time series are used to train the LSTM-MSNet. Here, a reseasonalisation phase is required as the target MW patches are seasonally adjusted. Whereas in the SE approach, the seasonal values extracted from the deseasonalisation phase are employed as exogenous variables, along with the original time series to train the LSTM-MSNet. Here a reseasonalisation phase is not required as the target MW patches contain the original distribution of the time series. A more detailed explaination of these training paradigms can be found in our [manuscript](https://arxiv.org/pdf/1909.04293.pdf). 13 | 14 | We used **DS** and **SE** naming conventions in our code repository to distinguish these training paradigms. Please note that this repo contains seperate preprocessing files for each of these training paradigms. 15 | 16 | **NOTICE**: You may find duplicated code as a result of these two paradigms. However, we expect to refactor this code, making the type of training paradigm, i.e., DS or SE, as a query parameter in our execution scripts. We also expect to migrate this source code to Tensorflow 2.0 soon. 17 | 18 | # Usage # 19 | 20 | ## Software Requirements ## 21 | 22 | | Software | Version | 23 | | ------------- | ------------- | 24 | | `Python` | `>=3.6` | 25 | | `Tensorflow` | `1.12.0` | 26 | | `smac` | `0.8.0` | 27 | 28 | As illustrated in the above figure, the LSTM-MSNet framework consists of three main phases: i) pre-processing phase: using state-of-the-art multi-seasonal decomposition techniques, i.e., *MSTL*, *Prophet*, *Tbats* to extract the seasonal components. Additonally, for the **SE** approach *fourier terms* have used to denote the seasonal trajectories (in order to supplement the subsequent LSTM training phase) ii) training phase: LSTM-MSNet framework training and iii) post-processing phase: retransform the forecasts into original scale. 29 | 30 | ## Path Variables ## 31 | 32 | Set the `PYTHONPATH` env variable of the system. Append absolute paths of both the project root directory and the directory of the `src/LSTM-Models/external_packages/cocob_optimizer` into the `PYTHONPATH` 33 | 34 | ## Preprocessing the Data ## 35 | 36 | ### Generating Train, Validation, and Test Scripts ### 37 | 38 | Three files need to be created for every model, one per training, validation and testing. For R scripts (under src/LSTM-Preprocessing-Scripts), make sure to set the working directory to the project root folder. As an example, *solar_train.txt* file is hardcoded in the scripts. The current source code supports for comma seperated data input, however this can be easily adjustable for other delimiters. 39 | 40 | We assume *solar_train.txt* contain hourly energy consumption observations of multiple households. Each time series consists of 2 years of hourly data, and may present three types of seasonalities; *daily*, *weekly*, and *yearly*. As explained earlier, **SE** and **DS** folders denote the two different paradigms. Whereas, **Baseline** folder denotes a varaint that does not use any paradigm when training the LSTM-MSNet. 41 | 42 | ### Generating TFrecords ### 43 | When training the LSTM-MSNet, we use the tfrecords function provided by the Tensorflow API for a faster execution of our models. The preprocessing scripts used to generate the tfrecords can be found in the `src/LSTM-Models/preprocess_scripts` directory. The `src/LSTM-Models/tfrecords_handler/moving_window` module converts the text data into tfrecords format (using `energy_tfrecord_DS_writer.py/energy_tfrecord_SE_writer.py`) as well as reads in tfrecord data (using `energy_tfrecord_DS_reader.py/energy_tfrecord_SE_reader.py`) during execution. 44 | 45 | Sample Record of validation file in moving window format: 46 | 47 | `1|i -0.120404761911659 -0.138029849544217 -0.158262315258994 -0.117573911196581 -0.047514354625692 -0.054921000563831 -0.087502195404757 -0.0468948356427585 -0.0265522120279886 -0.0259454546421436 -0.0149743425531481 -0.0750882944138711 0.0300152959591582 0.037022158965339 0.0168685236725015 |o -0.0487566395401897 -0.00313169841363781 -0.0356365611845675 0.11323901494058 0.0498791083802557 -0.0222170965043569 0.0324163879281905 0.0353096916266837 0.085162256512592 0.0644041024983562 0.0970988030382305 0.100330957527596 |# 6.88534358640275 -0.00313977170055892 -0.0044384039516765 0.00223114486678285 0.00574405742601041 0.00832797755707766 0.00264786188838812 0.00821557645548867 0.0196038788714076 -0.0082329067304395 -0.0136679361428553 -0.00526828286265864 -0.0120231978314266` 48 | 49 | ## Execution Instructions ## 50 | 51 | Example bash script: 52 | 53 | `python ./generic_model_energy_DS_trainer.py --dataset_name energy_ds --contain_zero_values 0 --initial_hyperparameter_values_file configs/initial_hyperparameter_values/energy_hourly --binary_train_file_train_mode datasets/binary_data/energy_ds/moving_window/energy_ds_30i24.tfrecords --binary_valid_file_train_mode datasets/binary_data/energy_ds/moving_window/energy_ds_30i24v.tfrecords --binary_train_file_test_mode datasets/binary_data/energy_ds/moving_window/energy_ds_12i15v.tfrecords --binary_test_file_test_mode datasets/binary_data/CIF_2016/moving_window/energy_dstest.tfrecords --txt_test_file datasets/text_data/CIF_2016/moving_window/energy_test.txt --actual_results_file datasets/text_data/CIF_2016/energy_results.txt --input_size 30 --forecast_horizon 24 --optimizer cocob -- cell_type LSTM --hyperparameter_tuning smac --model_type stacking --input_format moving_window --seasonality_period 8766 --original_data_file datasets/text_data/CIF_2016/energy_train.txt --seed 1234` 54 | 55 | ### External Arguments ### 56 | The model expects a number of arguments. 57 | 1. dataset_name - Any unique string for the name of the dataset 58 | 3. contain_zero_values - Whether the dataset contains zero values(0/1) 59 | 4. initial_hyperparameter_values_file - The file for the initial hyperparameter range configurations 60 | 5. binary_train_file_train_mode - The tfrecords file for train dataset in the training mode 61 | 6. binary_valid_file_train_mode - The tfrecords file for validation dataset in the training mode 62 | 7. binary_train_file_test_mode - The tfrecords file for train dataset in the testing mode 63 | 8. binary_test_file_test_mode - The tfrecords file for test dataset in the testing mode 64 | 9. txt_test_file - The text file for test dataset 65 | 10. actual_results_file - The text file of the actual results 66 | 11. original_data_file - The text file of the original dataset with all the given data points 67 | 12. cell_type - The cell type of the RNN(LSTM/GRU/RNN). Default is LSTM 68 | 13. input_size - The input size of the moving window. Default is 0 in the case of non moving window format 69 | 14. seasonality_period - The highest seasonality period of the time series (to calculate MASE) 70 | 15. forecast_horizon - The forecast horizon of the dataset 71 | 16. optimizer - The type of the optimizer(we only use cocob optimiser) 72 | 17. model_type - The type of the model(we only use stacking architecture) 73 | 18. input_format - Input format(we only use moving_window format) 74 | 19. seed - Integer seed to use as the random seed for hyperparameter tuning 75 | 76 | ### Execution Flow ### 77 | 78 | The first point of invoking the models is the `generic_model_energy_DS_trainer.py/generic_model_energy_SE_trainer.py` (for the simplicity of explaination, we use only the **DS** training paradigm scenario). The `generic_model_energy_DS_trainer.py` parses the external arguments and identifies the required type of model, optimizer, cell etc... The actual models are inside the directory `src/LSTM-Models/rnn_architectures/stacking_model/`. First, the hyperparameter tuning is carried out using the validation errors of the respective model trainer. Example initial hyperparameter ranges can be found inside the directory `src/LSTM-Models/configs/initial_hyperparameter_values`. The found optimal hyperparameter combination is written to a file in the directory `src/LSTM-Models/results/optimized_configurations/`. Then the found optimal hyperparameter combination is used on the respective model tester to generate the final forecasts. The final forecasts is written to the `src/LSTM-Models/results/rnn_forecasts` directory. 79 | 80 | 81 | ## Post Execution Steps ## 82 | 83 | ### Error Calculation ### 84 | The SMAPE and MASE errors are calculated per each series can be calculated using the *solar_test.txt*, given the RNN forecast generated in the `src/LSTM-Models/results/rnn_forecasts` repository. 85 | 86 | When using this repository, please cite: 87 | 88 | ``` 89 | @ARTICLE{Bandara2020-zt, 90 | title = "{LSTM-MSNet}: Leveraging Forecasts on Sets of Related Time Series 91 | With Multiple Seasonal Patterns", 92 | author = "Bandara, Kasun and Bergmeir, Christoph and Hewamalage, Hansika", 93 | journal = "IEEE Trans Neural Netw Learn Syst", 94 | month = apr, 95 | year = 2020, 96 | language = "en" 97 | } 98 | ``` 99 | -------------------------------------------------------------------------------- /docs/bandara2019LSTM-MsNet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/docs/bandara2019LSTM-MsNet.pdf -------------------------------------------------------------------------------- /src/Benchmarks/DynamicHaromicRegressionArima.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | df_test <- read.csv("solar_test.txt", header = FALSE) 6 | 7 | forecast_df = matrix(nrow = 300, ncol = 24) 8 | actual_df = matrix(nrow = 300, ncol = 24) 9 | 10 | start_time <- Sys.time() 11 | 12 | for(index in 1 : nrow(df_train)){ 13 | print("start") 14 | #time series data. 15 | cust_df <- as.numeric(df_train[index,]) 16 | actual_series <- as.numeric(df_test[index,]) 17 | 18 | arima_ts = msts(cust_df, seasonal.periods = c(24,168,8766)) 19 | 20 | xreg <- fourier(arima_ts, K =c(10,20,20)) 21 | fit <- auto.arima(arima_ts, xreg = xreg, seasonal = FALSE) 22 | 23 | arima_forecast = forecast(fit, xreg = fourier(arima_ts, K= c(10,20,20),h=24)) 24 | arima_forecast_mean <- as.numeric(arima_forecast$mean) 25 | arima_forecast_mean <- (arima_forecast_mean) 26 | 27 | end_time <- Sys.time() 28 | 29 | print(paste0("Total time", (end_time - start_time))) 30 | 31 | forecast_df[index, ] <- arima_forecast_mean 32 | actual_df[index, ] <- actual_series 33 | } 34 | 35 | 36 | write.table(forecast_df, "dynamicregression_arima_forecasts.txt", row.names = FALSE, col.names = FALSE) 37 | 38 | sMAPE <- rowMeans(2*abs(forecast_df - actual_df)/(abs(forecast_df) + abs(actual_df))) 39 | print(mean(sMAPE)) 40 | print(median(sMAPE)) 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/Benchmarks/DynamicHaromicRegressiontslm.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | df_test <- read.csv("solar_test.txt", header = FALSE) 6 | 7 | forecast_df = matrix(nrow = 300, ncol = 24) 8 | actual_df = matrix(nrow = 300, ncol = 24) 9 | 10 | start_time <- Sys.time() 11 | 12 | for(i in 1: nrow(df_train)){ 13 | print(i) 14 | cust_df <- as.numeric(df_train[i,]) 15 | #cust_df <- cust_df + 1 16 | cust_df_log <- (cust_df) 17 | 18 | actual_series <- as.numeric(df_test[i,]) 19 | 20 | arima_ts = msts(cust_df_log, seasonal.periods = c(24,168,8766)) 21 | fit <- tslm(arima_ts ~ fourier(arima_ts, K =c(10,20,20))) 22 | 23 | arima_forecast = forecast(fit, newdata = data.frame(fourier(arima_ts, K= c(10,20,20),h=24))) 24 | 25 | arima_forecast_mean <- as.numeric(arima_forecast$mean) 26 | arima_forecast_forecast <- (arima_forecast_mean) 27 | arima_forecast_forecast[arima_forecast_forecast <0] <- 0 28 | 29 | forecast_df[i, ] <- arima_forecast_forecast 30 | actual_df[i, ] <- actual_series 31 | } 32 | end_time <- Sys.time() 33 | 34 | print(paste0("Total time", (end_time - start_time))) 35 | 36 | write.table(forecast_df, "dynamicHarmonic_forecasts_tslm.txt", row.names = FALSE, col.names = FALSE) 37 | 38 | sMAPE <- rowMeans(2*abs(forecast_df - actual_df)/(abs(forecast_df) + abs(actual_df))) 39 | print(mean(sMAPE)) 40 | print(median(sMAPE)) 41 | 42 | -------------------------------------------------------------------------------- /src/Benchmarks/FFORMA.R: -------------------------------------------------------------------------------- 1 | # For more details about FFORMA method/installation/configurations: https://github.com/robjhyndman/M4metalearning/ 2 | set.seed(1234) 3 | library(M4metalearning) 4 | library(tsfeatures) 5 | library(xgboost) 6 | library(rBayesianOptimization) 7 | 8 | 9 | ts_df <- read.csv("solar_train.txt", header = FALSE, sep = ",") 10 | horizon <- 24 11 | ts_list = list() 12 | 13 | 14 | for (i in 1 : nrow(ts_df)){ 15 | print(i) 16 | time_series <- ts(as.numeric(ts_df[i,])) 17 | ts_list[[i]] <- list(st =paste0("D",i), x = time_series, h = horizon) 18 | } 19 | 20 | 21 | start_time <- Sys.time() 22 | meta_M4 <- temp_holdout(ts_list) 23 | 24 | print("Started Modelling") 25 | 26 | meta_M4 <- calc_forecasts(meta_M4, forec_methods(), n.cores=4) 27 | meta_M4 <- calc_errors(meta_M4) 28 | meta_M4 <- THA_features(meta_M4, n.cores=4) 29 | 30 | saveRDS(meta_M4, "metasolar.rds") 31 | 32 | #meta_M4 <- readRDS("metaenergy.rds") 33 | 34 | hyperparameter_search(meta_M4, filename = "solar_hyper.RData", n_iter=10) 35 | load("solar_hyper.RData") 36 | best_hyper <- bay_results[ which.min(bay_results$combi_OWA), ] 37 | 38 | #Train the metalearning model with the best hyperparameters found 39 | 40 | train_data <- create_feat_classif_problem(meta_M4) 41 | 42 | param <- list(max_depth=best_hyper$max_depth, 43 | eta=best_hyper$eta, 44 | nthread = 3, 45 | silent=1, 46 | objective=error_softmax_obj, 47 | num_class=ncol(train_data$errors), #the number of forecast methods used 48 | subsample=bay_results$subsample, 49 | colsample_bytree=bay_results$colsample_bytree) 50 | 51 | 52 | meta_model <- train_selection_ensemble(train_data$data, 53 | train_data$errors, 54 | param=param) 55 | 56 | print("Done model training") 57 | 58 | final_M4 <- ts_list 59 | 60 | #just calculate the forecast and features 61 | final_M4 <- calc_forecasts(final_M4, forec_methods()) 62 | final_M4 <- THA_features(final_M4) 63 | 64 | #get the feature matrix 65 | final_data <- create_feat_classif_problem(final_M4) 66 | #calculate the predictions using our model 67 | preds <- predict_selection_ensemble(meta_model, final_data$data) 68 | #calculate the final mean forecasts 69 | final_M4 <- ensemble_forecast(preds, final_M4) 70 | saveRDS(final_M4, "FinalForecastsSolarYearly.rds") 71 | 72 | #the combination predictions are in the field y_hat of each element in the list 73 | #lets check one 74 | end_time <- Sys.time() 75 | 76 | print(paste0("Total time", (end_time - start_time))) 77 | 78 | forecast_df = matrix(nrow = 300, ncol = 24) 79 | 80 | 81 | for (idr in 1 : length(final_M4)){ 82 | time_series_forecast <- as.numeric(final_M4[[idr]]$y_hat) 83 | time_series_forecast[time_series_forecast <0] <- 0 84 | forecast_df[idr,] <- time_series_forecast 85 | } 86 | write.table(forecast_df, "forma_solar_yearly_forecasts.txt", row.names = FALSE, col.names = FALSE) 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /src/Benchmarks/Prophet.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | library(xts) 3 | library(prophet) 4 | set.seed(1234) 5 | library(lubridate) 6 | 7 | df_train <- read.csv("solar_train.txt", header = FALSE) 8 | df_test <- read.csv("solar_test.txt", header = FALSE) 9 | 10 | forecast_df = matrix(nrow = 300, ncol = 24) 11 | actual_df = matrix(nrow = 300, ncol = 24) 12 | 13 | start_time <- Sys.time() 14 | 15 | for(i in 1: nrow(df_train)){ 16 | print(i) 17 | cust_df <- as.numeric(df_train[i,]) 18 | #cust_df <- cust_df + 1 19 | cust_df_log <- (cust_df) 20 | 21 | actual_series <- as.numeric(df_test[i,]) 22 | 23 | ts <- seq(from = as.POSIXct("2010-01-01 00:00"), length.out = length(cust_df_log), by = "hour") 24 | 25 | history <- data.frame(ds = ts, y = cust_df_log) 26 | 27 | future_start <- ts[length(cust_df_log)] + 3600 28 | 29 | future <- data.frame(ds = seq(from = future_start , length.out = 24, by = "hour")) 30 | 31 | m <- prophet(history, daily.seasonality = TRUE, weekly.seasonality = TRUE, yearly.seasonality = TRUE) 32 | forecast_prophet <- predict(m,future) 33 | 34 | prophet_forecast_mean <- as.numeric(forecast_prophet$yhat) 35 | prophet_forecast_mean[prophet_forecast_mean <0] <-0 36 | 37 | forecast_df[i, ] <- prophet_forecast_mean 38 | actual_df[i, ] <- actual_series 39 | } 40 | 41 | end_time <- Sys.time() 42 | 43 | print(paste0("Total time", (end_time - start_time))) 44 | 45 | write.table(forecast_df, "prophet_forecasts.txt", row.names = FALSE, col.names = FALSE) 46 | 47 | sMAPE <- rowMeans(2*abs(forecast_df - actual_df)/(abs(forecast_df) + abs(actual_df))) 48 | print(mean(sMAPE)) 49 | print(median(sMAPE)) -------------------------------------------------------------------------------- /src/Benchmarks/TBATS.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_yearly_train.txt", header = FALSE) 5 | df_test <- read.csv("solar_yearly_test.txt", header = FALSE) 6 | 7 | forecast_df = matrix(nrow = 300, ncol = 24) 8 | actual_df = matrix(nrow = 300, ncol = 24) 9 | 10 | start_time <- Sys.time() 11 | 12 | for(i in 1: nrow(df_train)){ 13 | print(i) 14 | cust_df <- as.numeric(df_train[i,]) 15 | cust_df_log <- (cust_df) 16 | 17 | actual_series <- as.numeric(df_test[i,]) 18 | 19 | tbats_ts = msts(cust_df_log, seasonal.periods = c(24,168,8766)) 20 | 21 | tbats_model = tbats(tbats_ts) 22 | 23 | tbats_forecast = forecast(tbats_model, h = 24) 24 | 25 | tbats_forecast_mean <- as.numeric(tbats_forecast$mean) 26 | tbats_forecast_forecast <- tbats_forecast_mean 27 | tbats_forecast_forecast[tbats_forecast_forecast <0] <- 0 28 | 29 | forecast_df[i, ] <- tbats_forecast_forecast 30 | actual_df[i, ] <- actual_series 31 | } 32 | 33 | end_time <- Sys.time() 34 | 35 | print(paste0("Total time", (end_time - start_time))) 36 | 37 | write.table(forecast_df, "tbats_solaryearly_forecasts.txt", row.names = FALSE, col.names = FALSE) 38 | 39 | sMAPE <- rowMeans(2*abs(forecast_df - actual_df)/(abs(forecast_df) + abs(actual_df))) 40 | print(mean(sMAPE)) 41 | print(median(sMAPE)) 42 | -------------------------------------------------------------------------------- /src/LSTM-Models/configs/global_config.py: -------------------------------------------------------------------------------- 1 | # configs for the model training 2 | class model_training_configs: 3 | INFO_FREQ = 1 4 | 5 | # configs for the model testing 6 | class model_testing_configs: 7 | RNN_FORECASTS_DIRECTORY = 'results/rnn_forecasts/' 8 | SNAIVE_FORECASTS_DIRECTORY = 'results/snaive_forecasts/' 9 | 10 | # configs for hyperparameter tuning(bayesian optimization/SMAC3) 11 | class hyperparameter_tuning_configs: 12 | BAYESIAN_INIT_POINTS = 5 13 | BAYESIAN_NUM_ITER = 100 14 | SMAC_RUNCOUNT_LIMIT = 20 15 | 16 | class training_data_configs: 17 | SHUFFLE_BUFFER_SIZE = 1 -------------------------------------------------------------------------------- /src/LSTM-Models/configs/initial_hyperparameter_values/energy_hourly: -------------------------------------------------------------------------------- 1 | gaussian_noise_stdev - 0.0001, 0.0008 2 | 3 | random_normal_initializer_stdev - 0.0001, 0.0008 4 | 5 | l2_regularization - 0.0001, 0.0008 6 | 7 | cell_dimension - 20, 50 8 | 9 | max_epoch_size - 1,2 10 | 11 | max_num_epochs - 3, 15 12 | 13 | minibatch_size - 40, 70 14 | 15 | num_hidden_layers - 1, 2 16 | 17 | rate_of_learning - 0.01, 0.1 18 | 19 | rate_of_decay - 0.7, 1 20 | 21 | # not needed to set to a meanigful value unless using TBPTT 22 | tbptt_chunk_length - 15, 25 -------------------------------------------------------------------------------- /src/LSTM-Models/error_calculator/moving_window/.Rhistory: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Models/error_calculator/moving_window/.Rhistory -------------------------------------------------------------------------------- /src/LSTM-Models/error_calculator/moving_window/energy_DS_evaluation.R: -------------------------------------------------------------------------------- 1 | #library(TSrepr) 2 | 3 | args <- commandArgs(trailingOnly = TRUE) 4 | rnn_forecast_file_path = args[1] 5 | errors_file_name = args[2] 6 | txt_test_file_name = args[3] 7 | actual_results_file_name = args[4] 8 | input_size = as.numeric(args[5]) 9 | output_size = as.numeric(args[6]) 10 | contain_zero_values = as.numeric(args[7]) 11 | 12 | root_directory = paste(dirname(getwd()), "time-series-forecasting", sep="/") 13 | 14 | # errors file name 15 | errors_directory = paste(root_directory, "results/errors", sep="/") 16 | errors_file_name_mean_median = paste("mean_median", errors_file_name, sep='_') 17 | SMAPE_file_name_all_errors = paste("all_smape_errors", errors_file_name, sep='_') 18 | #MASE_file_name_all_errors = paste("all_mase_errors", errors_file_name, sep='_') 19 | errors_file_full_name_mean_median = paste(errors_directory, errors_file_name_mean_median, sep='/') 20 | SMAPE_file_full_name_all_errors = paste(errors_directory, SMAPE_file_name_all_errors, sep='/') 21 | #MASE_file_full_name_all_errors = paste(errors_directory, MASE_file_name_all_errors, sep='/') 22 | 23 | # actual results file name 24 | actual_results_file_full_name = paste(root_directory, actual_results_file_name, sep="/") 25 | actual_results=read.csv(file=actual_results_file_full_name,sep=',',header = FALSE) 26 | 27 | # text test data file name 28 | txt_test_file_full_name = paste(root_directory, txt_test_file_name, sep="/") 29 | txt_test_df=read.csv(file=txt_test_file_full_name,sep = " ",header = FALSE) 30 | 31 | # rnn_forecasts file name 32 | forecasts_file_full_name = paste(root_directory, rnn_forecast_file_path, sep="/") 33 | forecasts_df=read.csv(forecasts_file_full_name, header = F, sep = ",") 34 | 35 | #names(actual_results)[1]="Series" 36 | #actual_results <- actual_results[,-1] 37 | 38 | # take the transpose of the dataframe 39 | value <- t(txt_test_df[1]) 40 | 41 | indexes <- length(value) - match(unique(value), rev(value)) + 1 42 | 43 | uniqueindexes <- unique(indexes) 44 | 45 | actual_results_df <- actual_results 46 | 47 | converted_forecasts_df = NULL 48 | converted_forecasts_matrix = matrix(nrow = nrow(forecasts_df), ncol = output_size) 49 | 50 | #mase_vector = NULL 51 | 52 | for(k in 1 :nrow(forecasts_df)){ 53 | one_ts_forecasts = as.numeric(forecasts_df[k,]) 54 | finalindex <- uniqueindexes[k] 55 | one_line_test_data = as.numeric(txt_test_df[finalindex,]) 56 | mean_value = one_line_test_data[input_size + 3] 57 | level_value = one_line_test_data[input_size + 4] 58 | 59 | seasonal_values = one_line_test_data[input_size + 5:length(one_line_test_data)] 60 | 61 | for (ii in 1:output_size) { 62 | converted_value = exp(one_ts_forecasts[ii] + level_value + seasonal_values[ii]) 63 | converted_value = converted_value -1 64 | converted_value = converted_value*mean_value 65 | converted_value[converted_value <0] = 0 66 | if(contain_zero_values == 1){ 67 | converted_value = converted_value 68 | } 69 | converted_forecasts_df[ii] = converted_value 70 | } 71 | converted_forecasts_matrix[k,] = converted_forecasts_df 72 | # mase_vector[k] = mase(unlist(actual_results_df[k,]), converted_forecasts_df, unlist(snaive_forecasts_df[k,])) 73 | } 74 | 75 | # calculating the SMAPE 76 | time_series_wise_SMAPE <- 2*abs(converted_forecasts_matrix-actual_results_df)/(abs(converted_forecasts_matrix)+abs(actual_results_df)) 77 | SMAPEPerSeries <- rowMeans(time_series_wise_SMAPE) 78 | 79 | mean_SMAPE = mean(SMAPEPerSeries) 80 | median_SMAPE = median(SMAPEPerSeries) 81 | std_SMAPE = sd(SMAPEPerSeries) 82 | 83 | mean_SMAPE = paste("mean_SMAPE", mean_SMAPE, sep=":") 84 | median_SMAPE = paste("median_SMAPE", median_SMAPE, sep=":") 85 | print(mean_SMAPE) 86 | print(median_SMAPE) 87 | print(std_SMAPE) 88 | 89 | # MASE 90 | #mean_MASE = mean(mase_vector) 91 | #median_MASE = median(mase_vector) 92 | #std_MASE = sd(mase_vector) 93 | 94 | #mean_MASE = paste("mean_MASE", mean_MASE, sep=":") 95 | #median_MASE = paste("median_MASE", median_MASE, sep=":") 96 | #std_MASE = paste("std_MASE", std_MASE, sep=":") 97 | #print(mean_MASE) 98 | #print(median_MASE) 99 | #print(std_MASE) 100 | 101 | # writing the SMAPE results to file 102 | write(c(mean_SMAPE, median_SMAPE, std_SMAPE, "\n"), file=errors_file_full_name_mean_median, append=FALSE) 103 | write.table(converted_forecasts_matrix, SMAPE_file_full_name_all_errors, row.names=FALSE, col.names=FALSE) 104 | #write.table(SMAPEPerSeries, SMAPE_file_full_name_all_errors, row.names=FALSE, col.names=FALSE) 105 | 106 | # writing the MASE results to file 107 | #write(c(mean_MASE, median_MASE, std_MASE, "\n"), file=errors_file_full_name_mean_median, append=TRUE) 108 | #write.table(mase_vector, MASE_file_full_name_all_errors, row.names=FALSE, col.names=FALSE) 109 | -------------------------------------------------------------------------------- /src/LSTM-Models/error_calculator/moving_window/energy_SE_evaluation.R: -------------------------------------------------------------------------------- 1 | #library(TSrepr) 2 | 3 | args <- commandArgs(trailingOnly = TRUE) 4 | rnn_forecast_file_path = args[1] 5 | errors_file_name = args[2] 6 | txt_test_file_name = args[3] 7 | actual_results_file_name = args[4] 8 | input_size = as.numeric(args[5]) 9 | output_size = as.numeric(args[6]) 10 | contain_zero_values = as.numeric(args[7]) 11 | 12 | root_directory = paste(dirname(getwd()), "time-series-forecasting", sep="/") 13 | 14 | # errors file name 15 | errors_directory = paste(root_directory, "results/errors", sep="/") 16 | errors_file_name_mean_median = paste("mean_median", errors_file_name, sep='_') 17 | SMAPE_file_name_all_errors = paste("all_smape_errors", errors_file_name, sep='_') 18 | #MASE_file_name_all_errors = paste("all_mase_errors", errors_file_name, sep='_') 19 | errors_file_full_name_mean_median = paste(errors_directory, errors_file_name_mean_median, sep='/') 20 | SMAPE_file_full_name_all_errors = paste(errors_directory, SMAPE_file_name_all_errors, sep='/') 21 | #MASE_file_full_name_all_errors = paste(errors_directory, MASE_file_name_all_errors, sep='/') 22 | 23 | # actual results file name 24 | actual_results_file_full_name = paste(root_directory, actual_results_file_name, sep="/") 25 | actual_results=read.csv(file=actual_results_file_full_name,sep=',',header = FALSE) 26 | 27 | # text test data file name 28 | txt_test_file_full_name = paste(root_directory, txt_test_file_name, sep="/") 29 | txt_test_df=read.csv(file=txt_test_file_full_name,sep = " ",header = FALSE) 30 | 31 | # rnn_forecasts file name 32 | forecasts_file_full_name = paste(root_directory, rnn_forecast_file_path, sep="/") 33 | forecasts_df=read.csv(forecasts_file_full_name, header = F, sep = ",") 34 | 35 | #names(actual_results)[1]="Series" 36 | #actual_results <- actual_results[,-1] 37 | 38 | # take the transpose of the dataframe 39 | value <- t(txt_test_df[1]) 40 | 41 | indexes <- length(value) - match(unique(value), rev(value)) + 1 42 | 43 | uniqueindexes <- unique(indexes) 44 | 45 | actual_results_df <- actual_results 46 | 47 | converted_forecasts_df = NULL 48 | converted_forecasts_matrix = matrix(nrow = nrow(forecasts_df), ncol = output_size) 49 | 50 | #mase_vector = NULL 51 | 52 | for(k in 1 :nrow(forecasts_df)){ 53 | one_ts_forecasts = as.numeric(forecasts_df[k,]) 54 | finalindex <- uniqueindexes[k] 55 | one_line_test_data = as.numeric(txt_test_df[finalindex,]) 56 | mean_value = one_line_test_data[input_size + 3] 57 | level_value = one_line_test_data[input_size + 4] 58 | 59 | for (ii in 1:output_size) { 60 | converted_value = exp(one_ts_forecasts[ii] + level_value) 61 | converted_value = converted_value -1 62 | converted_value = converted_value*mean_value 63 | converted_value[converted_value <0] = 0 64 | if(contain_zero_values == 1){ 65 | converted_value = converted_value -1 66 | } 67 | converted_forecasts_df[ii] = converted_value 68 | } 69 | converted_forecasts_matrix[k,] = converted_forecasts_df 70 | # mase_vector[k] = mase(unlist(actual_results_df[k,]), converted_forecasts_df, unlist(snaive_forecasts_df[k,])) 71 | } 72 | 73 | # calculating the SMAPE 74 | time_series_wise_SMAPE <- 2*abs(converted_forecasts_matrix-actual_results_df)/(abs(converted_forecasts_matrix)+abs(actual_results_df)) 75 | SMAPEPerSeries <- rowMeans(time_series_wise_SMAPE, na.rm=TRUE) 76 | 77 | mean_SMAPE = mean(SMAPEPerSeries) 78 | median_SMAPE = median(SMAPEPerSeries) 79 | std_SMAPE = sd(SMAPEPerSeries) 80 | 81 | mean_SMAPE = paste("mean_SMAPE", mean_SMAPE, sep=":") 82 | median_SMAPE = paste("median_SMAPE", median_SMAPE, sep=":") 83 | print(mean_SMAPE) 84 | print(median_SMAPE) 85 | print(std_SMAPE) 86 | 87 | # MASE 88 | #mean_MASE = mean(mase_vector) 89 | #median_MASE = median(mase_vector) 90 | #std_MASE = sd(mase_vector) 91 | 92 | #mean_MASE = paste("mean_MASE", mean_MASE, sep=":") 93 | #median_MASE = paste("median_MASE", median_MASE, sep=":") 94 | #std_MASE = paste("std_MASE", std_MASE, sep=":") 95 | #print(mean_MASE) 96 | #print(median_MASE) 97 | #print(std_MASE) 98 | 99 | # writing the SMAPE results to file 100 | write(c(mean_SMAPE, median_SMAPE, std_SMAPE, "\n"), file=errors_file_full_name_mean_median, append=FALSE) 101 | write.table(converted_forecasts_matrix, SMAPE_file_full_name_all_errors, row.names=FALSE, col.names=FALSE) 102 | #write.table(SMAPEPerSeries, SMAPE_file_full_name_all_errors, row.names=FALSE, col.names=FALSE) 103 | 104 | # writing the MASE results to file 105 | #write(c(mean_MASE, median_MASE, std_MASE, "\n"), file=errors_file_full_name_mean_median, append=TRUE) 106 | #write.table(mase_vector, MASE_file_full_name_all_errors, row.names=FALSE, col.names=FALSE) 107 | -------------------------------------------------------------------------------- /src/LSTM-Models/external_packages/cocob_optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | from cocob_optimizer import * -------------------------------------------------------------------------------- /src/LSTM-Models/external_packages/cocob_optimizer/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Models/external_packages/cocob_optimizer/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /src/LSTM-Models/external_packages/cocob_optimizer/__pycache__/cocob_optimizer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Models/external_packages/cocob_optimizer/__pycache__/cocob_optimizer.cpython-36.pyc -------------------------------------------------------------------------------- /src/LSTM-Models/external_packages/cocob_optimizer/cocob_optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Francesco Orabona. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | ''' 18 | COntinuos COin Betting (COCOB) optimizer 19 | ''' 20 | 21 | from tensorflow.python.framework import ops 22 | from tensorflow.python.ops import state_ops 23 | from tensorflow.python.ops import control_flow_ops 24 | from tensorflow.python.framework import constant_op 25 | from tensorflow.python.training.optimizer import Optimizer 26 | import tensorflow as tf 27 | 28 | 29 | class COCOB(Optimizer): 30 | def __init__(self, alpha=100, use_locking=False, name='COCOB'): 31 | ''' 32 | constructs a new COCOB optimizer 33 | ''' 34 | super(COCOB, self).__init__(use_locking, name) 35 | self._alpha = alpha 36 | 37 | def _create_slots(self, var_list): 38 | for v in var_list: 39 | with ops.colocate_with(v): 40 | gradients_sum = constant_op.constant(0, 41 | shape=v.get_shape(), 42 | dtype=v.dtype.base_dtype) 43 | grad_norm_sum = constant_op.constant(0, 44 | shape=v.get_shape(), 45 | dtype=v.dtype.base_dtype) 46 | L = constant_op.constant(1e-8, shape=v.get_shape(), dtype=v.dtype.base_dtype) 47 | tilde_w = constant_op.constant(0.0, shape=v.get_shape(), dtype=v.dtype.base_dtype) 48 | reward = constant_op.constant(0.0, shape=v.get_shape(), dtype=v.dtype.base_dtype) 49 | 50 | self._get_or_make_slot(v, L, "L", self._name) 51 | self._get_or_make_slot(v, grad_norm_sum, "grad_norm_sum", self._name) 52 | self._get_or_make_slot(v, gradients_sum, "gradients_sum", self._name) 53 | self._get_or_make_slot(v, tilde_w, "tilde_w", self._name) 54 | self._get_or_make_slot(v, reward, "reward", self._name) 55 | 56 | def _apply_dense(self, grad, var): 57 | gradients_sum = self.get_slot(var, "gradients_sum") 58 | grad_norm_sum = self.get_slot(var, "grad_norm_sum") 59 | tilde_w = self.get_slot(var, "tilde_w") 60 | L = self.get_slot(var, "L") 61 | reward = self.get_slot(var, "reward") 62 | 63 | L_update = tf.maximum(L,tf.abs(grad)) 64 | gradients_sum_update = gradients_sum + grad 65 | grad_norm_sum_update = grad_norm_sum + tf.abs(grad) 66 | reward_update = tf.maximum(reward-grad*tilde_w,0) 67 | new_w = -gradients_sum_update/(L_update*(tf.maximum(grad_norm_sum_update+L_update,self._alpha*L_update)))*(reward_update+L_update) 68 | var_update = var-tilde_w+new_w 69 | tilde_w_update=new_w 70 | 71 | gradients_sum_update_op = state_ops.assign(gradients_sum, gradients_sum_update) 72 | grad_norm_sum_update_op = state_ops.assign(grad_norm_sum, grad_norm_sum_update) 73 | var_update_op = state_ops.assign(var, var_update) 74 | tilde_w_update_op = state_ops.assign(tilde_w, tilde_w_update) 75 | L_update_op = state_ops.assign(L, L_update) 76 | reward_update_op = state_ops.assign(reward, reward_update) 77 | 78 | return control_flow_ops.group(*[gradients_sum_update_op, 79 | var_update_op, 80 | grad_norm_sum_update_op, 81 | tilde_w_update_op, 82 | reward_update_op, 83 | L_update_op]) 84 | 85 | def _apply_sparse(self, grad, var): 86 | return self._apply_dense(grad, var) 87 | 88 | def _resource_apply_dense(self, grad, handle): 89 | return self._apply_dense(grad, handle) 90 | 91 | -------------------------------------------------------------------------------- /src/LSTM-Models/generic_model_energy_DS_test.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import tensorflow as tf 3 | 4 | # import the different model types 5 | 6 | # stacking model 7 | from rnn_architectures.stacking_model.stacking_mean_model_tester import StackingModelTester as StackingModelTester 8 | 9 | # seq2seq model with decoder 10 | from rnn_architectures.seq2seq_model.with_decoder.non_moving_window.seq2seq_model_tester import \ 11 | Seq2SeqModelTester as Seq2SeqModelTesterWithNonMovingWindow 12 | 13 | # seq2seq model with dense layer 14 | from rnn_architectures.seq2seq_model.with_dense_layer.non_moving_window.seq2seq_model_tester import \ 15 | Seq2SeqModelTesterWithDenseLayer as Seq2SeqModelTesterWithDenseLayerNonMovingWindow 16 | from rnn_architectures.seq2seq_model.with_dense_layer.moving_window.seq2seq_mean_model_tester import \ 17 | Seq2SeqModelTesterWithDenseLayer as Seq2SeqModelTesterWithDenseLayerMovingWindow 18 | 19 | # attention model 20 | from rnn_architectures.attention_model.bahdanau_attention.with_stl_decomposition.non_moving_window.attention_model_tester import \ 21 | AttentionModelTester as AttentionModelTesterWithNonMovingWindowWithoutSeasonality 22 | from rnn_architectures.attention_model.bahdanau_attention.without_stl_decomposition.non_moving_window.attention_model_tester import \ 23 | AttentionModelTester as AttentionModelTesterWithNonMovingWindowWithSeasonality 24 | 25 | # import the cocob optimizer 26 | from external_packages import cocob_optimizer 27 | from utility_scripts.invoke_r_m4_mean_hourly import invoke_r_script 28 | 29 | from configs.global_configs import model_testing_configs 30 | 31 | LSTM_USE_PEEPHOLES = True 32 | BIAS = False 33 | 34 | learning_rate = 0.0 35 | # learning_rate_decay = 0.0 36 | 37 | 38 | # function to create the optimizer 39 | def adagrad_optimizer_fn(total_loss): 40 | # global_step = tf.Variable(0, trainable=False) 41 | # rate = tf.train.exponential_decay(learning_rate=learning_rate, global_step=global_step, decay_steps=1, 42 | # decay_rate=learning_rate_decay) 43 | return tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(total_loss) 44 | 45 | 46 | def adam_optimizer_fn(total_loss): 47 | # global_step = tf.Variable(0, trainable=False) 48 | # rate = tf.train.exponential_decay(learning_rate=learning_rate, global_step=global_step, decay_steps=1, 49 | # decay_rate=learning_rate_decay) 50 | return tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss) 51 | 52 | 53 | def cocob_optimizer_fn(total_loss): 54 | return cocob_optimizer.COCOB().minimize(loss=total_loss) 55 | 56 | 57 | def testing(args, config_dictionary): 58 | # to make the random number choices reproducible 59 | 60 | global learning_rate 61 | # global learning_rate_decay 62 | 63 | dataset_name = args.dataset_name 64 | contain_zero_values = int(args.contain_zero_values) 65 | binary_train_file_path_test_mode = args.binary_train_file_test_mode 66 | binary_test_file_path_test_mode = args.binary_test_file_test_mode 67 | txt_test_file_path = args.txt_test_file 68 | actual_results_file_path = args.actual_results_file 69 | if (args.input_size): 70 | input_size = int(args.input_size) 71 | else: 72 | input_size = 0 73 | output_size = int(args.forecast_horizon) 74 | optimizer = args.optimizer 75 | hyperparameter_tuning = args.hyperparameter_tuning 76 | model_type = args.model_type 77 | input_format = args.input_format 78 | seed = int(args.seed) 79 | 80 | if args.without_stl_decomposition: 81 | without_stl_decomposition = bool(int(args.without_stl_decomposition)) 82 | else: 83 | without_stl_decomposition = False 84 | 85 | if args.with_truncated_backpropagation: 86 | with_truncated_backpropagation = bool(int(args.with_truncated_backpropagation)) 87 | else: 88 | with_truncated_backpropagation = False 89 | 90 | if args.cell_type: 91 | cell_type = args.cell_type 92 | else: 93 | cell_type = "LSTM" 94 | 95 | if not with_truncated_backpropagation: 96 | tbptt_identifier = "without_truncated_backpropagation" 97 | else: 98 | tbptt_identifier = "with_truncated_backpropagation" 99 | 100 | if not without_stl_decomposition: 101 | stl_decomposition_identifier = "with_stl_decomposition" 102 | else: 103 | stl_decomposition_identifier = "without_stl_decomposition" 104 | 105 | model_identifier = dataset_name + "_" + model_type + "_" + cell_type + "cell" + "_" + input_format + "_" + stl_decomposition_identifier + "_" + hyperparameter_tuning + "_" + optimizer + "_" + tbptt_identifier + "_" + str( 106 | seed) 107 | print("Model Testing Started for {}".format(model_identifier)) 108 | print(config_dictionary) 109 | 110 | # select the optimizer 111 | if optimizer == "cocob": 112 | optimizer_fn = cocob_optimizer_fn 113 | elif optimizer == "adagrad": 114 | optimizer_fn = adagrad_optimizer_fn 115 | elif optimizer == "adam": 116 | optimizer_fn = adam_optimizer_fn 117 | 118 | # define the key word arguments for the different model types 119 | model_kwargs = { 120 | 'use_bias': BIAS, 121 | 'use_peepholes': LSTM_USE_PEEPHOLES, 122 | 'input_size': input_size, 123 | 'output_size': output_size, 124 | 'binary_train_file_path': binary_train_file_path_test_mode, 125 | 'binary_test_file_path': binary_test_file_path_test_mode, 126 | 'seed': seed, 127 | 'cell_type': cell_type 128 | } 129 | 130 | # select the model type 131 | if model_type == "stacking": 132 | model_tester = StackingModelTester(**model_kwargs) 133 | elif model_type == "seq2seq": 134 | model_tester = Seq2SeqModelTesterWithNonMovingWindow(**model_kwargs) 135 | elif model_type == "seq2seqwithdenselayer": 136 | if input_format == "non_moving_window": 137 | model_tester = Seq2SeqModelTesterWithDenseLayerNonMovingWindow(**model_kwargs) 138 | elif input_format == "moving_window": 139 | model_tester = Seq2SeqModelTesterWithDenseLayerMovingWindow(**model_kwargs) 140 | elif model_type == "attention": 141 | if without_stl_decomposition: 142 | model_tester = AttentionModelTesterWithNonMovingWindowWithSeasonality(**model_kwargs) 143 | else: 144 | model_tester = AttentionModelTesterWithNonMovingWindowWithoutSeasonality(**model_kwargs) 145 | 146 | if 'rate_of_learning' in config_dictionary: 147 | learning_rate = config_dictionary['rate_of_learning'] 148 | # learning_rate_decay = config_dictionary['rate_of_decay'] 149 | num_hidden_layers = config_dictionary['num_hidden_layers'] 150 | max_num_epochs = config_dictionary['max_num_epochs'] 151 | max_epoch_size = config_dictionary['max_epoch_size'] 152 | cell_dimension = config_dictionary['cell_dimension'] 153 | l2_regularization = config_dictionary['l2_regularization'] 154 | minibatch_size = config_dictionary['minibatch_size'] 155 | gaussian_noise_stdev = config_dictionary['gaussian_noise_stdev'] 156 | random_normal_initializer_stdev = config_dictionary['random_normal_initializer_stdev'] 157 | 158 | list_of_forecasts = model_tester.test_model(num_hidden_layers=int(round(num_hidden_layers)), 159 | cell_dimension=int(round(cell_dimension)), 160 | minibatch_size=int(round(minibatch_size)), 161 | max_epoch_size=int(round(max_epoch_size)), 162 | max_num_epochs=int(round(max_num_epochs)), 163 | l2_regularization=l2_regularization, 164 | gaussian_noise_stdev=gaussian_noise_stdev, 165 | random_normal_initializer_stdev=random_normal_initializer_stdev, 166 | optimizer_fn=optimizer_fn) 167 | 168 | # write the forecasting results to a file 169 | rnn_forecasts_file_path = model_testing_configs.RNN_FORECASTS_DIRECTORY + model_identifier + '.txt' 170 | 171 | with open(rnn_forecasts_file_path, "w") as output: 172 | writer = csv.writer(output, lineterminator='\n') 173 | writer.writerows(list_of_forecasts) 174 | 175 | # invoke the final evaluation R script 176 | error_file_name = model_identifier + '.txt' 177 | #snaive_forecasts_file_path = model_testing_configs.SNAIVE_FORECASTS_DIRECTORY + dataset_name + '.txt' 178 | 179 | if input_format == "moving_window": 180 | invoke_r_script((rnn_forecasts_file_path, error_file_name, txt_test_file_path, 181 | actual_results_file_path, str(input_size), str(output_size), str(contain_zero_values)), True, 182 | False) 183 | else: 184 | if without_stl_decomposition: 185 | invoke_r_script((rnn_forecasts_file_path, error_file_name, txt_test_file_path, 186 | actual_results_file_path, str(output_size), str(contain_zero_values)), False, True) 187 | else: 188 | invoke_r_script((rnn_forecasts_file_path, error_file_name, txt_test_file_path, 189 | actual_results_file_path, 190 | str(output_size), str(contain_zero_values)), False, False) 191 | -------------------------------------------------------------------------------- /src/LSTM-Models/generic_model_energy_SE_test.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import tensorflow as tf 3 | 4 | # import the different model types 5 | 6 | # stacking model 7 | from rnn_architectures.stacking_model.stacking_mean_model_feature_tester import StackingModelTester as StackingModelTester 8 | 9 | # seq2seq model with decoder 10 | from rnn_architectures.seq2seq_model.with_decoder.non_moving_window.seq2seq_model_tester import \ 11 | Seq2SeqModelTester as Seq2SeqModelTesterWithNonMovingWindow 12 | 13 | # seq2seq model with dense layer 14 | from rnn_architectures.seq2seq_model.with_dense_layer.non_moving_window.seq2seq_model_tester import \ 15 | Seq2SeqModelTesterWithDenseLayer as Seq2SeqModelTesterWithDenseLayerNonMovingWindow 16 | from rnn_architectures.seq2seq_model.with_dense_layer.moving_window.seq2seq_mean_model_tester import \ 17 | Seq2SeqModelTesterWithDenseLayer as Seq2SeqModelTesterWithDenseLayerMovingWindow 18 | 19 | # attention model 20 | from rnn_architectures.attention_model.bahdanau_attention.with_stl_decomposition.non_moving_window.attention_model_tester import \ 21 | AttentionModelTester as AttentionModelTesterWithNonMovingWindowWithoutSeasonality 22 | from rnn_architectures.attention_model.bahdanau_attention.without_stl_decomposition.non_moving_window.attention_model_tester import \ 23 | AttentionModelTester as AttentionModelTesterWithNonMovingWindowWithSeasonality 24 | 25 | # import the cocob optimizer 26 | from external_packages import cocob_optimizer 27 | from utility_scripts.invoke_r_m4_mean_four_hourly import invoke_r_script 28 | 29 | from configs.global_configs import model_testing_configs 30 | 31 | LSTM_USE_PEEPHOLES = True 32 | BIAS = False 33 | 34 | learning_rate = 0.0 35 | # learning_rate_decay = 0.0 36 | 37 | 38 | # function to create the optimizer 39 | def adagrad_optimizer_fn(total_loss): 40 | # global_step = tf.Variable(0, trainable=False) 41 | # rate = tf.train.exponential_decay(learning_rate=learning_rate, global_step=global_step, decay_steps=1, 42 | # decay_rate=learning_rate_decay) 43 | return tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(total_loss) 44 | 45 | 46 | def adam_optimizer_fn(total_loss): 47 | # global_step = tf.Variable(0, trainable=False) 48 | # rate = tf.train.exponential_decay(learning_rate=learning_rate, global_step=global_step, decay_steps=1, 49 | # decay_rate=learning_rate_decay) 50 | return tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss) 51 | 52 | 53 | def cocob_optimizer_fn(total_loss): 54 | return cocob_optimizer.COCOB().minimize(loss=total_loss) 55 | 56 | 57 | def testing(args, config_dictionary): 58 | # to make the random number choices reproducible 59 | 60 | global learning_rate 61 | # global learning_rate_decay 62 | 63 | dataset_name = args.dataset_name 64 | contain_zero_values = int(args.contain_zero_values) 65 | binary_train_file_path_test_mode = args.binary_train_file_test_mode 66 | binary_test_file_path_test_mode = args.binary_test_file_test_mode 67 | txt_test_file_path = args.txt_test_file 68 | actual_results_file_path = args.actual_results_file 69 | if (args.input_size): 70 | input_size = int(args.input_size) 71 | else: 72 | input_size = 0 73 | output_size = int(args.forecast_horizon) 74 | optimizer = args.optimizer 75 | hyperparameter_tuning = args.hyperparameter_tuning 76 | model_type = args.model_type 77 | input_format = args.input_format 78 | seed = int(args.seed) 79 | 80 | if args.without_stl_decomposition: 81 | without_stl_decomposition = bool(int(args.without_stl_decomposition)) 82 | else: 83 | without_stl_decomposition = False 84 | 85 | if args.with_truncated_backpropagation: 86 | with_truncated_backpropagation = bool(int(args.with_truncated_backpropagation)) 87 | else: 88 | with_truncated_backpropagation = False 89 | 90 | if args.cell_type: 91 | cell_type = args.cell_type 92 | else: 93 | cell_type = "LSTM" 94 | 95 | if not with_truncated_backpropagation: 96 | tbptt_identifier = "without_truncated_backpropagation" 97 | else: 98 | tbptt_identifier = "with_truncated_backpropagation" 99 | 100 | if not without_stl_decomposition: 101 | stl_decomposition_identifier = "with_stl_decomposition" 102 | else: 103 | stl_decomposition_identifier = "without_stl_decomposition" 104 | 105 | model_identifier = dataset_name + "_" + model_type + "_" + cell_type + "cell" + "_" + input_format + "_" + stl_decomposition_identifier + "_" + hyperparameter_tuning + "_" + optimizer + "_" + tbptt_identifier + "_" + str( 106 | seed) 107 | print("Model Testing Started for {}".format(model_identifier)) 108 | print(config_dictionary) 109 | 110 | # select the optimizer 111 | if optimizer == "cocob": 112 | optimizer_fn = cocob_optimizer_fn 113 | elif optimizer == "adagrad": 114 | optimizer_fn = adagrad_optimizer_fn 115 | elif optimizer == "adam": 116 | optimizer_fn = adam_optimizer_fn 117 | 118 | # define the key word arguments for the different model types 119 | model_kwargs = { 120 | 'use_bias': BIAS, 121 | 'use_peepholes': LSTM_USE_PEEPHOLES, 122 | 'input_size': input_size, 123 | 'output_size': output_size, 124 | 'binary_train_file_path': binary_train_file_path_test_mode, 125 | 'binary_test_file_path': binary_test_file_path_test_mode, 126 | 'seed': seed, 127 | 'cell_type': cell_type 128 | } 129 | 130 | # select the model type 131 | if model_type == "stacking": 132 | model_tester = StackingModelTester(**model_kwargs) 133 | elif model_type == "seq2seq": 134 | model_tester = Seq2SeqModelTesterWithNonMovingWindow(**model_kwargs) 135 | elif model_type == "seq2seqwithdenselayer": 136 | if input_format == "non_moving_window": 137 | model_tester = Seq2SeqModelTesterWithDenseLayerNonMovingWindow(**model_kwargs) 138 | elif input_format == "moving_window": 139 | model_tester = Seq2SeqModelTesterWithDenseLayerMovingWindow(**model_kwargs) 140 | elif model_type == "attention": 141 | if without_stl_decomposition: 142 | model_tester = AttentionModelTesterWithNonMovingWindowWithSeasonality(**model_kwargs) 143 | else: 144 | model_tester = AttentionModelTesterWithNonMovingWindowWithoutSeasonality(**model_kwargs) 145 | 146 | if 'rate_of_learning' in config_dictionary: 147 | learning_rate = config_dictionary['rate_of_learning'] 148 | # learning_rate_decay = config_dictionary['rate_of_decay'] 149 | num_hidden_layers = config_dictionary['num_hidden_layers'] 150 | max_num_epochs = config_dictionary['max_num_epochs'] 151 | max_epoch_size = config_dictionary['max_epoch_size'] 152 | cell_dimension = config_dictionary['cell_dimension'] 153 | l2_regularization = config_dictionary['l2_regularization'] 154 | minibatch_size = config_dictionary['minibatch_size'] 155 | gaussian_noise_stdev = config_dictionary['gaussian_noise_stdev'] 156 | random_normal_initializer_stdev = config_dictionary['random_normal_initializer_stdev'] 157 | 158 | list_of_forecasts = model_tester.test_model(num_hidden_layers=int(round(num_hidden_layers)), 159 | cell_dimension=int(round(cell_dimension)), 160 | minibatch_size=int(round(minibatch_size)), 161 | max_epoch_size=int(round(max_epoch_size)), 162 | max_num_epochs=int(round(max_num_epochs)), 163 | l2_regularization=l2_regularization, 164 | gaussian_noise_stdev=gaussian_noise_stdev, 165 | random_normal_initializer_stdev=random_normal_initializer_stdev, 166 | optimizer_fn=optimizer_fn) 167 | 168 | # write the forecasting results to a file 169 | rnn_forecasts_file_path = model_testing_configs.RNN_FORECASTS_DIRECTORY + model_identifier + '.txt' 170 | 171 | with open(rnn_forecasts_file_path, "w") as output: 172 | writer = csv.writer(output, lineterminator='\n') 173 | writer.writerows(list_of_forecasts) 174 | 175 | # invoke the final evaluation R script 176 | error_file_name = model_identifier + '.txt' 177 | #snaive_forecasts_file_path = model_testing_configs.SNAIVE_FORECASTS_DIRECTORY + dataset_name + '.txt' 178 | 179 | if input_format == "moving_window": 180 | invoke_r_script((rnn_forecasts_file_path, error_file_name, txt_test_file_path, 181 | actual_results_file_path, str(input_size), str(output_size), str(contain_zero_values)), True, 182 | False) 183 | else: 184 | if without_stl_decomposition: 185 | invoke_r_script((rnn_forecasts_file_path, error_file_name, txt_test_file_path, 186 | actual_results_file_path, str(output_size), str(contain_zero_values)), False, True) 187 | else: 188 | invoke_r_script((rnn_forecasts_file_path, error_file_name, txt_test_file_path, 189 | actual_results_file_path, 190 | str(output_size), str(contain_zero_values)), False, False) 191 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/Baseline/energy_create_tfrecords_baseline.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_feature_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 30, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_baseline_30i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_baseline_30i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_baseline_test30i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_baseline_30i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_baseline_30i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_baseline_test30i24.tfrecords' 14 | ) 15 | print("Start reading") 16 | tfrecord_writer.read_text_data() 17 | print("Done reading") 18 | tfrecord_writer.write_train_data_to_tfrecord_file() 19 | print("Done Training") 20 | tfrecord_writer.write_validation_data_to_tfrecord_file() 21 | print("Done Validating") 22 | tfrecord_writer.write_test_data_to_tfrecord_file() 23 | print("Done Testing") 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/DS/MSTL-7-DS/energy_create_tfrecords_mean_hourly_mstl7.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 30, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl7_30i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl7_30i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl7_hourly_test30i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl7_30i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl7_30i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl7_test30i24.tfrecords' 14 | ) 15 | 16 | print("Stage1") 17 | tfrecord_writer.read_text_data() 18 | print("Stage2") 19 | tfrecord_writer.write_train_data_to_tfrecord_file() 20 | print("Stage3") 21 | tfrecord_writer.write_validation_data_to_tfrecord_file() 22 | print("Stage4") 23 | tfrecord_writer.write_test_data_to_tfrecord_file() 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/DS/MSTL-DS/energy_create_tfrecords_mean_hourly_mstl.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 30, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl_30i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl_30i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl_test30i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl_30i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl_30i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl_test30i24.tfrecords' 14 | ) 15 | 16 | print("Stage1") 17 | tfrecord_writer.read_text_data() 18 | print("Stage2") 19 | tfrecord_writer.write_train_data_to_tfrecord_file() 20 | print("Stage3") 21 | tfrecord_writer.write_validation_data_to_tfrecord_file() 22 | print("Stage4") 23 | tfrecord_writer.write_test_data_to_tfrecord_file() 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/DS/Prophet-DS/energy_create_tfrecords_mean_hourly_prophet.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 30, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_prophet_30i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_prophet_30i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_prophet_test30i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_prophet_30i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_prophet_30i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_prophet_test30i24.tfrecords' 14 | ) 15 | 16 | print("Stage1") 17 | tfrecord_writer.read_text_data() 18 | print("Stage2") 19 | tfrecord_writer.write_train_data_to_tfrecord_file() 20 | print("Stage3") 21 | tfrecord_writer.write_validation_data_to_tfrecord_file() 22 | print("Stage4") 23 | tfrecord_writer.write_test_data_to_tfrecord_file() 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/DS/TBATS-DS/energy_create_tfrecords_mean_hourly_tbats.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 30, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_tbats_30i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_tbats_30i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_tbats_test30i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_tbats_30i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_tbats_30i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_tbats_test30i24.tfrecords' 14 | ) 15 | 16 | print("Stage1") 17 | tfrecord_writer.read_text_data() 18 | print("Stage2") 19 | tfrecord_writer.write_train_data_to_tfrecord_file() 20 | print("Stage3") 21 | tfrecord_writer.write_validation_data_to_tfrecord_file() 22 | print("Stage4") 23 | tfrecord_writer.write_test_data_to_tfrecord_file() 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/SE/Fourier-SE-K1/energy_create_tfrecords_fourier.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_feature_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 36, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_fourierk1_36i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_fourierk1_36i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_fourierk1_test36i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_fourierk1_36i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_fourierk1_36i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_fourierk1_test36i24.tfrecords' 14 | ) 15 | print("Start reading") 16 | tfrecord_writer.read_text_data() 17 | print("Done reading") 18 | tfrecord_writer.write_train_data_to_tfrecord_file() 19 | print("Done Training") 20 | tfrecord_writer.write_validation_data_to_tfrecord_file() 21 | print("Done Validating") 22 | tfrecord_writer.write_test_data_to_tfrecord_file() 23 | print("Done Testing") 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/SE/Fourier-SE/energy_create_tfrecords_fourier.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_feature_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 33, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_fourier_33i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_fourier_33i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_fourier_test33i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_fourier_33i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_fourier_33i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_fourier_test33i24.tfrecords' 14 | ) 15 | print("Start reading") 16 | tfrecord_writer.read_text_data() 17 | print("Done reading") 18 | tfrecord_writer.write_train_data_to_tfrecord_file() 19 | print("Done Training") 20 | tfrecord_writer.write_validation_data_to_tfrecord_file() 21 | print("Done Validating") 22 | tfrecord_writer.write_test_data_to_tfrecord_file() 23 | print("Done Testing") 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/SE/MSTL-7-SE/energy_create_tfrecords_mstl7_feature.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_feature_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 33, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl7_33i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl7_33i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl7_test33i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl7_33i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl7_33i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl7_test33i24.tfrecords' 14 | ) 15 | print("Start reading") 16 | tfrecord_writer.read_text_data() 17 | print("Done reading") 18 | tfrecord_writer.write_train_data_to_tfrecord_file() 19 | print("Done Training") 20 | tfrecord_writer.write_validation_data_to_tfrecord_file() 21 | print("Done Validating") 22 | tfrecord_writer.write_test_data_to_tfrecord_file() 23 | print("Done Testing") 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/SE/MSTL-SE/energy_create_tfrecords_mstl_feature.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_feature_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 33, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl_33i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl_33i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_mstl_test33i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl_33i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl_33i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_mstl_test33i24.tfrecords' 14 | ) 15 | print("Start reading") 16 | tfrecord_writer.read_text_data() 17 | print("Done reading") 18 | tfrecord_writer.write_train_data_to_tfrecord_file() 19 | print("Done Training") 20 | tfrecord_writer.write_validation_data_to_tfrecord_file() 21 | print("Done Validating") 22 | tfrecord_writer.write_test_data_to_tfrecord_file() 23 | print("Done Testing") 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/SE/Prophet-SE/energy_create_tfrecords_prophet_feature.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_feature_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 33, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_prophet_33i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_prophet_33i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_prophet_test33i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_prophet_33i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_prophet_33i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_prophet_test_33i24.tfrecords' 14 | ) 15 | print("Start reading") 16 | tfrecord_writer.read_text_data() 17 | print("Done reading") 18 | tfrecord_writer.write_train_data_to_tfrecord_file() 19 | print("Done Training") 20 | tfrecord_writer.write_validation_data_to_tfrecord_file() 21 | print("Done Validating") 22 | tfrecord_writer.write_test_data_to_tfrecord_file() 23 | print("Done Testing") 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/preprocess_scripts/SE/TBATS-SE/energy_create_tfrecords_tbats_feature.py: -------------------------------------------------------------------------------- 1 | from tfrecords_handler.moving_window.tfrecord_mean_feature_writer import TFRecordWriter 2 | 3 | if __name__ == '__main__': 4 | # hourly data 5 | tfrecord_writer = TFRecordWriter( 6 | input_size = 33, 7 | output_size = 24, 8 | train_file_path = '../../../datasets/text_data/M4/moving_window/energy_tbats_33i24.txt', 9 | validate_file_path = '../../../datasets/text_data/M4/moving_window/energy_tbats_33i24v.txt', 10 | test_file_path = '../../../datasets/text_data/M4/moving_window/energy_tbats_test33i24.txt', 11 | binary_train_file_path = '../../../datasets/binary_data/M4/moving_window/energy_tbats_33i24.tfrecords', 12 | binary_validation_file_path = '../../../datasets/binary_data/M4/moving_window/energy_tbats_33i24v.tfrecords', 13 | binary_test_file_path = '../../../datasets/binary_data/M4/moving_window/energy_tbats_test33i24.tfrecords' 14 | ) 15 | print("Start reading") 16 | tfrecord_writer.read_text_data() 17 | print("Done reading") 18 | tfrecord_writer.write_train_data_to_tfrecord_file() 19 | print("Done Training") 20 | tfrecord_writer.write_validation_data_to_tfrecord_file() 21 | print("Done Validating") 22 | tfrecord_writer.write_test_data_to_tfrecord_file() 23 | print("Done Testing") 24 | -------------------------------------------------------------------------------- /src/LSTM-Models/results/errors/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Models/results/errors/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Models/results/optimized_configurations/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Models/results/optimized_configurations/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Models/results/processed_rnn_forecasts/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Models/results/processed_rnn_forecasts/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Models/results/rnn_forecasts/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Models/results/rnn_forecasts/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Models/results/validation_errors/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Models/results/validation_errors/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Models/rnn_architectures/stacking_model/energy_stacking_model_DS_tester.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tfrecords_handler.moving_window.tfrecord_mean_reader import TFRecordReader 4 | from configs.global_configs import training_data_configs 5 | 6 | 7 | class StackingModelTester: 8 | 9 | def __init__(self, **kwargs): 10 | self.__use_bias = kwargs["use_bias"] 11 | self.__use_peepholes = kwargs["use_peepholes"] 12 | self.__input_size = kwargs["input_size"] 13 | self.__output_size = kwargs["output_size"] 14 | self.__binary_train_file_path = kwargs["binary_train_file_path"] 15 | self.__binary_test_file_path = kwargs["binary_test_file_path"] 16 | self.__seed = kwargs["seed"] 17 | self.__cell_type = kwargs["cell_type"] 18 | 19 | def __l1_loss(self, z, t): 20 | loss = tf.reduce_mean(tf.abs(t - z)) 21 | return loss 22 | 23 | def __l2_loss(selfself, z, t): 24 | loss = tf.losses.mean_squared_error(labels=t, predictions=z) 25 | return loss 26 | 27 | # Training the time series 28 | def test_model(self, **kwargs): 29 | 30 | # extract the parameters from the kwargs 31 | num_hidden_layers = kwargs['num_hidden_layers'] 32 | cell_dimension = kwargs['cell_dimension'] 33 | minibatch_size = kwargs['minibatch_size'] 34 | max_epoch_size = kwargs['max_epoch_size'] 35 | max_num_epochs = kwargs['max_num_epochs'] 36 | l2_regularization = kwargs['l2_regularization'] 37 | gaussian_noise_stdev = kwargs['gaussian_noise_stdev'] 38 | optimizer_fn = kwargs['optimizer_fn'] 39 | random_normal_initializer_stdev = kwargs['random_normal_initializer_stdev'] 40 | 41 | # reset the tensorflow graph 42 | tf.reset_default_graph() 43 | 44 | tf.set_random_seed(self.__seed) 45 | 46 | # declare the input and output placeholders 47 | input = tf.placeholder(dtype=tf.float32, shape=[None, None, self.__input_size]) 48 | noise = tf.random_normal(shape=tf.shape(input), mean=0.0, stddev=gaussian_noise_stdev, dtype=tf.float32) 49 | training_input = input + noise 50 | 51 | testing_input = input 52 | 53 | # output format [batch_size, sequence_length, dimension] 54 | true_output = tf.placeholder(dtype=tf.float32, shape=[None, None, self.__output_size]) 55 | sequence_lengths = tf.placeholder(dtype=tf.int64, shape=[None]) 56 | 57 | weight_initializer = tf.truncated_normal_initializer(stddev=random_normal_initializer_stdev) 58 | 59 | # RNN with the layer of cells 60 | def cell(): 61 | if self.__cell_type == "LSTM": 62 | cell = tf.nn.rnn_cell.LSTMCell(num_units=int(cell_dimension), use_peepholes=self.__use_peepholes, 63 | initializer=weight_initializer) 64 | elif self.__cell_type == "GRU": 65 | cell = tf.nn.rnn_cell.GRUCell(num_units=int(cell_dimension), kernel_initializer=weight_initializer) 66 | elif self.__cell_type == "RNN": 67 | cell = tf.nn.rnn_cell.BasicRNNCell(num_units=int(cell_dimension)) 68 | return cell 69 | 70 | multi_layered_cell = tf.nn.rnn_cell.MultiRNNCell(cells=[cell() for _ in range(int(num_hidden_layers))]) 71 | 72 | with tf.variable_scope('train_scope') as train_scope: 73 | training_rnn_outputs, training_rnn_states = tf.nn.dynamic_rnn(cell=multi_layered_cell, 74 | inputs=training_input, 75 | sequence_length=sequence_lengths, 76 | dtype=tf.float32) 77 | 78 | # connect the dense layer to the RNN 79 | training_prediction_output = tf.layers.dense( 80 | inputs=tf.convert_to_tensor(value=training_rnn_outputs, dtype=tf.float32), 81 | units=self.__output_size, 82 | use_bias=self.__use_bias, kernel_initializer=weight_initializer, name='dense_layer') 83 | 84 | with tf.variable_scope(train_scope, reuse=tf.AUTO_REUSE) as inference_scope: 85 | inference_rnn_outputs, inference_rnn_states = tf.nn.dynamic_rnn(cell=multi_layered_cell, 86 | inputs=testing_input, 87 | sequence_length=sequence_lengths, 88 | dtype=tf.float32) 89 | # connect the dense layer to the RNN 90 | inference_prediction_output = tf.layers.dense( 91 | inputs=tf.convert_to_tensor(value=inference_rnn_outputs, dtype=tf.float32), 92 | units=self.__output_size, 93 | use_bias=self.__use_bias, kernel_initializer=weight_initializer, name='dense_layer', reuse=True) 94 | 95 | # error that should be minimized in the training process 96 | error = self.__l1_loss(training_prediction_output, true_output) 97 | 98 | # l2 regularization of the trainable model parameters 99 | l2_loss = 0.0 100 | for var in tf.trainable_variables(): 101 | l2_loss += tf.nn.l2_loss(var) 102 | 103 | l2_loss = tf.multiply(tf.cast(l2_regularization, dtype=tf.float64), tf.cast(l2_loss, dtype=tf.float64)) 104 | 105 | total_loss = tf.cast(error, dtype=tf.float64) + l2_loss 106 | 107 | # create the adagrad optimizer 108 | optimizer = optimizer_fn(total_loss) 109 | 110 | # create the Dataset objects for the training and test data 111 | training_dataset = tf.data.TFRecordDataset(filenames=[self.__binary_train_file_path], compression_type="ZLIB") 112 | test_dataset = tf.data.TFRecordDataset([self.__binary_test_file_path], compression_type="ZLIB") 113 | 114 | # parse the records 115 | tfrecord_reader = TFRecordReader(self.__input_size, self.__output_size) 116 | 117 | # prepare the training data into batches 118 | # randomly shuffle the time series within the dataset 119 | shuffle_seed = tf.placeholder(dtype=tf.int64, shape=[]) 120 | # training_dataset = training_dataset.apply( 121 | # tf.data.experimental.shuffle_and_repeat(buffer_size=training_data_configs.SHUFFLE_BUFFER_SIZE, 122 | # count=int(max_epoch_size), seed=shuffle_seed)) 123 | training_dataset = training_dataset.repeat(count=int(max_epoch_size)) 124 | training_dataset = training_dataset.map(tfrecord_reader.validation_data_parser) 125 | 126 | # create the batches by padding the datasets to make the variable sequence lengths fixed within the individual batches 127 | padded_training_data_batches = training_dataset.padded_batch(batch_size=int(minibatch_size), 128 | padded_shapes=( 129 | [], [tf.Dimension(None), self.__input_size], 130 | [tf.Dimension(None), self.__output_size], 131 | [tf.Dimension(None), self.__output_size + 2])) 132 | 133 | # get an iterator to the batches 134 | training_data_batch_iterator = padded_training_data_batches.make_initializable_iterator() 135 | 136 | # access each batch using the iterator 137 | next_training_data_batch = training_data_batch_iterator.get_next() 138 | 139 | # preparing the test data 140 | test_dataset = test_dataset.map(tfrecord_reader.test_data_parser) 141 | 142 | # create a single batch from all the test time series by padding the datasets to make the variable sequence lengths fixed 143 | padded_test_input_data = test_dataset.padded_batch(batch_size=int(minibatch_size), 144 | padded_shapes=([], [tf.Dimension(None), self.__input_size], 145 | [tf.Dimension(None), self.__output_size + 2])) 146 | 147 | # get an iterator to the test input data batch 148 | test_input_iterator = padded_test_input_data.make_one_shot_iterator() 149 | 150 | # access the test input batch using the iterator 151 | test_input_data_batch = test_input_iterator.get_next() 152 | 153 | # setup variable initialization 154 | init_op = tf.global_variables_initializer() 155 | 156 | with tf.Session() as session: 157 | session.run(init_op) 158 | 159 | for epoch in range(int(max_num_epochs)): 160 | print("Epoch->", epoch) 161 | session.run(training_data_batch_iterator.initializer, feed_dict={shuffle_seed: epoch}) 162 | while True: 163 | try: 164 | training_data_batch_value = session.run(next_training_data_batch, 165 | feed_dict={shuffle_seed: epoch}) 166 | 167 | session.run(optimizer, 168 | feed_dict={input: training_data_batch_value[1], 169 | true_output: training_data_batch_value[2], 170 | sequence_lengths: training_data_batch_value[0]}) 171 | 172 | except tf.errors.OutOfRangeError: 173 | break 174 | 175 | # applying the model to the test data 176 | 177 | list_of_forecasts = [] 178 | while True: 179 | try: 180 | 181 | # get the batch of test inputs 182 | test_input_batch_value = session.run(test_input_data_batch) 183 | 184 | # get the output of the network for the test input data batch 185 | test_output = session.run(inference_prediction_output, 186 | feed_dict={input: test_input_batch_value[1], 187 | sequence_lengths: test_input_batch_value[0]}) 188 | 189 | last_output_index = test_input_batch_value[0] - 1 190 | array_first_dimension = np.array(range(0, test_input_batch_value[0].shape[0])) 191 | forecasts = test_output[array_first_dimension, last_output_index] 192 | list_of_forecasts.extend(forecasts.tolist()) 193 | 194 | except tf.errors.OutOfRangeError: 195 | break 196 | 197 | session.close() 198 | return list_of_forecasts 199 | -------------------------------------------------------------------------------- /src/LSTM-Models/rnn_architectures/stacking_model/energy_stacking_model_SE_tester.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tfrecords_handler.moving_window.tfrecord_mean_feature_reader import TFRecordReader 4 | from configs.global_configs import training_data_configs 5 | 6 | 7 | class StackingModelTester: 8 | 9 | def __init__(self, **kwargs): 10 | self.__use_bias = kwargs["use_bias"] 11 | self.__use_peepholes = kwargs["use_peepholes"] 12 | self.__input_size = kwargs["input_size"] 13 | self.__output_size = kwargs["output_size"] 14 | self.__binary_train_file_path = kwargs["binary_train_file_path"] 15 | self.__binary_test_file_path = kwargs["binary_test_file_path"] 16 | self.__seed = kwargs["seed"] 17 | self.__cell_type = kwargs["cell_type"] 18 | 19 | def __l1_loss(self, z, t): 20 | loss = tf.reduce_mean(tf.abs(t - z)) 21 | return loss 22 | 23 | def __l2_loss(selfself, z, t): 24 | loss = tf.losses.mean_squared_error(labels=t, predictions=z) 25 | return loss 26 | 27 | # Training the time series 28 | def test_model(self, **kwargs): 29 | 30 | # extract the parameters from the kwargs 31 | num_hidden_layers = kwargs['num_hidden_layers'] 32 | cell_dimension = kwargs['cell_dimension'] 33 | minibatch_size = kwargs['minibatch_size'] 34 | max_epoch_size = kwargs['max_epoch_size'] 35 | max_num_epochs = kwargs['max_num_epochs'] 36 | l2_regularization = kwargs['l2_regularization'] 37 | gaussian_noise_stdev = kwargs['gaussian_noise_stdev'] 38 | optimizer_fn = kwargs['optimizer_fn'] 39 | random_normal_initializer_stdev = kwargs['random_normal_initializer_stdev'] 40 | 41 | # reset the tensorflow graph 42 | tf.reset_default_graph() 43 | 44 | tf.set_random_seed(self.__seed) 45 | 46 | # declare the input and output placeholders 47 | input = tf.placeholder(dtype=tf.float32, shape=[None, None, self.__input_size]) 48 | noise = tf.random_normal(shape=tf.shape(input), mean=0.0, stddev=gaussian_noise_stdev, dtype=tf.float32) 49 | training_input = input + noise 50 | 51 | testing_input = input 52 | 53 | # output format [batch_size, sequence_length, dimension] 54 | true_output = tf.placeholder(dtype=tf.float32, shape=[None, None, self.__output_size]) 55 | sequence_lengths = tf.placeholder(dtype=tf.int64, shape=[None]) 56 | 57 | weight_initializer = tf.truncated_normal_initializer(stddev=random_normal_initializer_stdev) 58 | 59 | # RNN with the layer of cells 60 | def cell(): 61 | if self.__cell_type == "LSTM": 62 | cell = tf.nn.rnn_cell.LSTMCell(num_units=int(cell_dimension), use_peepholes=self.__use_peepholes, 63 | initializer=weight_initializer) 64 | elif self.__cell_type == "GRU": 65 | cell = tf.nn.rnn_cell.GRUCell(num_units=int(cell_dimension), kernel_initializer=weight_initializer) 66 | elif self.__cell_type == "RNN": 67 | cell = tf.nn.rnn_cell.BasicRNNCell(num_units=int(cell_dimension)) 68 | return cell 69 | 70 | multi_layered_cell = tf.nn.rnn_cell.MultiRNNCell(cells=[cell() for _ in range(int(num_hidden_layers))]) 71 | 72 | with tf.variable_scope('train_scope') as train_scope: 73 | training_rnn_outputs, training_rnn_states = tf.nn.dynamic_rnn(cell=multi_layered_cell, 74 | inputs=training_input, 75 | sequence_length=sequence_lengths, 76 | dtype=tf.float32) 77 | 78 | # connect the dense layer to the RNN 79 | training_prediction_output = tf.layers.dense( 80 | inputs=tf.convert_to_tensor(value=training_rnn_outputs, dtype=tf.float32), 81 | units=self.__output_size, 82 | use_bias=self.__use_bias, kernel_initializer=weight_initializer, name='dense_layer') 83 | 84 | with tf.variable_scope(train_scope, reuse=tf.AUTO_REUSE) as inference_scope: 85 | inference_rnn_outputs, inference_rnn_states = tf.nn.dynamic_rnn(cell=multi_layered_cell, 86 | inputs=testing_input, 87 | sequence_length=sequence_lengths, 88 | dtype=tf.float32) 89 | # connect the dense layer to the RNN 90 | inference_prediction_output = tf.layers.dense( 91 | inputs=tf.convert_to_tensor(value=inference_rnn_outputs, dtype=tf.float32), 92 | units=self.__output_size, 93 | use_bias=self.__use_bias, kernel_initializer=weight_initializer, name='dense_layer', reuse=True) 94 | 95 | # error that should be minimized in the training process 96 | error = self.__l1_loss(training_prediction_output, true_output) 97 | 98 | # l2 regularization of the trainable model parameters 99 | l2_loss = 0.0 100 | for var in tf.trainable_variables(): 101 | l2_loss += tf.nn.l2_loss(var) 102 | 103 | l2_loss = tf.multiply(tf.cast(l2_regularization, dtype=tf.float64), tf.cast(l2_loss, dtype=tf.float64)) 104 | 105 | total_loss = tf.cast(error, dtype=tf.float64) + l2_loss 106 | 107 | # create the adagrad optimizer 108 | optimizer = optimizer_fn(total_loss) 109 | 110 | # create the Dataset objects for the training and test data 111 | training_dataset = tf.data.TFRecordDataset(filenames=[self.__binary_train_file_path], compression_type="ZLIB") 112 | test_dataset = tf.data.TFRecordDataset([self.__binary_test_file_path], compression_type="ZLIB") 113 | 114 | # parse the records 115 | tfrecord_reader = TFRecordReader(self.__input_size, self.__output_size) 116 | 117 | # prepare the training data into batches 118 | # randomly shuffle the time series within the dataset 119 | shuffle_seed = tf.placeholder(dtype=tf.int64, shape=[]) 120 | # training_dataset = training_dataset.apply( 121 | # tf.data.experimental.shuffle_and_repeat(buffer_size=training_data_configs.SHUFFLE_BUFFER_SIZE, 122 | # count=int(max_epoch_size), seed=shuffle_seed)) 123 | training_dataset = training_dataset.repeat(count=int(max_epoch_size)) 124 | training_dataset = training_dataset.map(tfrecord_reader.validation_data_parser) 125 | 126 | # create the batches by padding the datasets to make the variable sequence lengths fixed within the individual batches 127 | padded_training_data_batches = training_dataset.padded_batch(batch_size=int(minibatch_size), 128 | padded_shapes=( 129 | [], [tf.Dimension(None), self.__input_size], 130 | [tf.Dimension(None), self.__output_size], 131 | [tf.Dimension(None), 2])) 132 | 133 | # get an iterator to the batches 134 | training_data_batch_iterator = padded_training_data_batches.make_initializable_iterator() 135 | 136 | # access each batch using the iterator 137 | next_training_data_batch = training_data_batch_iterator.get_next() 138 | 139 | # preparing the test data 140 | test_dataset = test_dataset.map(tfrecord_reader.test_data_parser) 141 | 142 | # create a single batch from all the test time series by padding the datasets to make the variable sequence lengths fixed 143 | padded_test_input_data = test_dataset.padded_batch(batch_size=int(minibatch_size), 144 | padded_shapes=([], [tf.Dimension(None), self.__input_size], 145 | [tf.Dimension(None), 2])) 146 | 147 | # get an iterator to the test input data batch 148 | test_input_iterator = padded_test_input_data.make_one_shot_iterator() 149 | 150 | # access the test input batch using the iterator 151 | test_input_data_batch = test_input_iterator.get_next() 152 | 153 | # setup variable initialization 154 | init_op = tf.global_variables_initializer() 155 | 156 | with tf.Session() as session: 157 | session.run(init_op) 158 | 159 | for epoch in range(int(max_num_epochs)): 160 | print("Epoch->", epoch) 161 | session.run(training_data_batch_iterator.initializer, feed_dict={shuffle_seed: epoch}) 162 | while True: 163 | try: 164 | training_data_batch_value = session.run(next_training_data_batch, 165 | feed_dict={shuffle_seed: epoch}) 166 | 167 | session.run(optimizer, 168 | feed_dict={input: training_data_batch_value[1], 169 | true_output: training_data_batch_value[2], 170 | sequence_lengths: training_data_batch_value[0]}) 171 | 172 | except tf.errors.OutOfRangeError: 173 | break 174 | 175 | # applying the model to the test data 176 | 177 | list_of_forecasts = [] 178 | while True: 179 | try: 180 | 181 | # get the batch of test inputs 182 | test_input_batch_value = session.run(test_input_data_batch) 183 | 184 | # get the output of the network for the test input data batch 185 | test_output = session.run(inference_prediction_output, 186 | feed_dict={input: test_input_batch_value[1], 187 | sequence_lengths: test_input_batch_value[0]}) 188 | 189 | last_output_index = test_input_batch_value[0] - 1 190 | array_first_dimension = np.array(range(0, test_input_batch_value[0].shape[0])) 191 | forecasts = test_output[array_first_dimension, last_output_index] 192 | list_of_forecasts.extend(forecasts.tolist()) 193 | 194 | except tf.errors.OutOfRangeError: 195 | break 196 | 197 | session.close() 198 | return list_of_forecasts 199 | -------------------------------------------------------------------------------- /src/LSTM-Models/rnn_architectures/stacking_model/energy_stacking_model_SE_trainer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tfrecords_handler.moving_window.tfrecord_mean_feature_reader import TFRecordReader 4 | from configs.global_configs import model_training_configs 5 | from configs.global_configs import training_data_configs 6 | 7 | 8 | class StackingModelTrainer: 9 | 10 | def __init__(self, **kwargs): 11 | self.__use_bias = kwargs["use_bias"] 12 | self.__use_peepholes = kwargs["use_peepholes"] 13 | self.__input_size = kwargs["input_size"] 14 | self.__output_size = kwargs["output_size"] 15 | self.__binary_train_file_path = kwargs["binary_train_file_path"] 16 | self.__binary_validation_file_path = kwargs["binary_validation_file_path"] 17 | self.__contain_zero_values = kwargs["contain_zero_values"] 18 | self.__seed = kwargs["seed"] 19 | self.__cell_type = kwargs["cell_type"] 20 | 21 | def __l1_loss(self, z, t): 22 | loss = tf.reduce_mean(tf.abs(t - z)) 23 | return loss 24 | 25 | def __l2_loss(self, z, t): 26 | loss = tf.losses.mean_squared_error(labels=t, predictions=z) 27 | return loss 28 | 29 | # Training the time series 30 | def train_model(self, **kwargs): 31 | 32 | # extract the parameters from the kwargs 33 | num_hidden_layers = kwargs['num_hidden_layers'] 34 | cell_dimension = kwargs['cell_dimension'] 35 | minibatch_size = kwargs['minibatch_size'] 36 | max_epoch_size = kwargs['max_epoch_size'] 37 | max_num_epochs = kwargs['max_num_epochs'] 38 | l2_regularization = kwargs['l2_regularization'] 39 | gaussian_noise_stdev = kwargs['gaussian_noise_stdev'] 40 | optimizer_fn = kwargs['optimizer_fn'] 41 | random_normal_initializer_stdev = kwargs['random_normal_initializer_stdev'] 42 | 43 | print(kwargs) 44 | 45 | tf.reset_default_graph() 46 | 47 | tf.set_random_seed(self.__seed) 48 | 49 | # declare the input and output placeholders 50 | 51 | # input format [batch_size, sequence_length, dimension] 52 | input = tf.placeholder(dtype=tf.float32, shape=[None, None, self.__input_size]) 53 | noise = tf.random_normal(shape=tf.shape(input), mean=0.0, stddev=gaussian_noise_stdev, dtype=tf.float32) 54 | training_input = input + noise 55 | 56 | validation_input = input 57 | 58 | # output format [batch_size, sequence_length, dimension] 59 | true_output = tf.placeholder(dtype=tf.float32, shape=[None, None, self.__output_size]) 60 | sequence_lengths = tf.placeholder(dtype=tf.int64, shape=[None]) 61 | 62 | weight_initializer = tf.truncated_normal_initializer(stddev=random_normal_initializer_stdev) 63 | 64 | # RNN with the layer of cells 65 | def cell(): 66 | if self.__cell_type == "LSTM": 67 | cell = tf.nn.rnn_cell.LSTMCell(num_units=int(cell_dimension), use_peepholes=self.__use_peepholes, 68 | initializer=weight_initializer) 69 | elif self.__cell_type == "GRU": 70 | cell = tf.nn.rnn_cell.GRUCell(num_units=int(cell_dimension), kernel_initializer=weight_initializer) 71 | elif self.__cell_type == "RNN": 72 | cell = tf.nn.rnn_cell.BasicRNNCell(num_units=int(cell_dimension)) 73 | return cell 74 | 75 | multi_layered_cell = tf.nn.rnn_cell.MultiRNNCell(cells=[cell() for _ in range(int(num_hidden_layers))]) 76 | 77 | with tf.variable_scope('train_scope') as train_scope: 78 | training_rnn_outputs, training_rnn_states = tf.nn.dynamic_rnn(cell=multi_layered_cell, 79 | inputs=training_input, 80 | sequence_length=sequence_lengths, 81 | dtype=tf.float32) 82 | 83 | # connect the dense layer to the RNN 84 | training_prediction_output = tf.layers.dense( 85 | inputs=tf.convert_to_tensor(value=training_rnn_outputs, dtype=tf.float32), 86 | units=self.__output_size, 87 | use_bias=self.__use_bias, kernel_initializer=weight_initializer, name='dense_layer') 88 | 89 | with tf.variable_scope(train_scope, reuse=tf.AUTO_REUSE) as inference_scope: 90 | inference_rnn_outputs, inference_rnn_states = tf.nn.dynamic_rnn(cell=multi_layered_cell, 91 | inputs=validation_input, 92 | sequence_length=sequence_lengths, 93 | dtype=tf.float32) 94 | # connect the dense layer to the RNN 95 | inference_prediction_output = tf.layers.dense( 96 | inputs=tf.convert_to_tensor(value=inference_rnn_outputs, dtype=tf.float32), 97 | units=self.__output_size, 98 | use_bias=self.__use_bias, kernel_initializer=weight_initializer, name='dense_layer', reuse=True) 99 | 100 | error = self.__l1_loss(training_prediction_output, true_output) 101 | 102 | # l2 regularization of the trainable model parameters 103 | l2_loss = 0.0 104 | for var in tf.trainable_variables(): 105 | l2_loss += tf.nn.l2_loss(var) 106 | 107 | l2_loss = tf.multiply(tf.cast(l2_regularization, dtype=tf.float64), tf.cast(l2_loss, dtype=tf.float64)) 108 | 109 | total_loss = tf.cast(error, dtype=tf.float64) + l2_loss 110 | 111 | # create the adagrad optimizer 112 | optimizer = optimizer_fn(total_loss) 113 | 114 | # create the training and validation datasets from the tfrecord files 115 | training_dataset = tf.data.TFRecordDataset(filenames=[self.__binary_train_file_path], compression_type="ZLIB") 116 | validation_dataset = tf.data.TFRecordDataset(filenames=[self.__binary_validation_file_path], 117 | compression_type="ZLIB") 118 | 119 | # parse the records 120 | tfrecord_reader = TFRecordReader(self.__input_size, self.__output_size) 121 | 122 | # define the expected shapes of data after padding 123 | train_padded_shapes = ([], [tf.Dimension(None), self.__input_size], [tf.Dimension(None), self.__output_size]) 124 | validation_padded_shapes = ( 125 | [], [tf.Dimension(None), self.__input_size], [tf.Dimension(None), self.__output_size], 126 | [tf.Dimension(None), 2]) 127 | 128 | # prepare the training data into batches 129 | # randomly shuffle the time series within the dataset and repeat for the value of the epoch size 130 | shuffle_seed = tf.placeholder(dtype=tf.int64, shape=[]) 131 | #training_dataset = training_dataset.apply( 132 | # tf.data.experimental.shuffle_and_repeat(buffer_size=training_data_configs.SHUFFLE_BUFFER_SIZE, 133 | # count=int(max_epoch_size), seed=shuffle_seed)) 134 | 135 | training_dataset = training_dataset.repeat(count=int(max_epoch_size)) 136 | training_dataset = training_dataset.map(tfrecord_reader.train_data_parser) 137 | 138 | padded_training_data_batches = training_dataset.padded_batch(batch_size=int(minibatch_size), 139 | padded_shapes=train_padded_shapes) 140 | 141 | training_data_batch_iterator = padded_training_data_batches.make_initializable_iterator() 142 | next_training_data_batch = training_data_batch_iterator.get_next() 143 | 144 | # prepare the validation data into batches 145 | validation_dataset = validation_dataset.map(tfrecord_reader.validation_data_parser) 146 | 147 | # create a single batch from all the validation time series by padding the datasets to make the variable sequence lengths fixed 148 | padded_validation_dataset = validation_dataset.padded_batch(batch_size=minibatch_size, 149 | padded_shapes=validation_padded_shapes) 150 | 151 | # get an iterator to the validation data 152 | validation_data_iterator = padded_validation_dataset.make_initializable_iterator() 153 | 154 | # access the validation data using the iterator 155 | next_validation_data_batch = validation_data_iterator.get_next() 156 | 157 | # setup variable initialization 158 | init_op = tf.global_variables_initializer() 159 | 160 | with tf.Session() as session: 161 | session.run(init_op) 162 | 163 | smape_final = 0.0 164 | smape_list = [] 165 | for epoch in range(int(max_num_epochs)): 166 | print("Epoch->", epoch) 167 | 168 | session.run(training_data_batch_iterator.initializer, feed_dict={ 169 | shuffle_seed: epoch}) # initialize the iterator to the beginning of the training dataset 170 | 171 | while True: 172 | try: 173 | training_data_batch_value = session.run(next_training_data_batch, 174 | feed_dict={shuffle_seed: epoch}) 175 | 176 | _, total_loss_value = session.run([optimizer, total_loss], 177 | feed_dict={training_input: training_data_batch_value[1], 178 | true_output: training_data_batch_value[2], 179 | sequence_lengths: training_data_batch_value[0]}) 180 | 181 | except tf.errors.OutOfRangeError: 182 | break 183 | 184 | session.run( 185 | validation_data_iterator.initializer) # initialize the iterator to the beginning of the training dataset 186 | 187 | while True: 188 | try: 189 | 190 | # get the batch of validation inputs 191 | validation_data_batch_value = session.run(next_validation_data_batch) 192 | 193 | # get the output of the network for the validation input data batch 194 | validation_output = session.run(inference_prediction_output, 195 | feed_dict={input: validation_data_batch_value[1], 196 | sequence_lengths: validation_data_batch_value[0] 197 | }) 198 | # calculate the smape for the validation data using vectorization 199 | 200 | # convert the data to remove the preprocessing 201 | last_indices = validation_data_batch_value[0] - 1 202 | array_first_dimension = np.array(range(0, validation_data_batch_value[0].shape[0])) 203 | 204 | mean_value = validation_data_batch_value[3][array_first_dimension, last_indices, 0] 205 | 206 | level_values = validation_data_batch_value[3][array_first_dimension, last_indices, 1] 207 | 208 | last_validation_outputs = validation_output[array_first_dimension, last_indices] 209 | converted_validation_output = np.exp( 210 | level_values[:, np.newaxis] + last_validation_outputs) 211 | converted_validation_output = converted_validation_output - 1 212 | converted_validation_output = converted_validation_output*mean_value[:, np.newaxis] 213 | converted_validation_output[converted_validation_output < 0] = 0 214 | 215 | actual_values = validation_data_batch_value[2][array_first_dimension, last_indices, :] 216 | converted_actual_values = np.exp( 217 | level_values[:, np.newaxis] + actual_values) 218 | converted_actual_values = converted_actual_values -1 219 | converted_actual_values = converted_actual_values * mean_value[:, np.newaxis] 220 | converted_actual_values[converted_actual_values < 0] = 0 221 | 222 | if (self.__contain_zero_values): # to compensate for 0 values in data 223 | converted_validation_output = converted_validation_output - 1 224 | converted_actual_values = converted_actual_values - 1 225 | 226 | # calculate the smape 227 | smape = np.mean(np.abs(converted_validation_output - converted_actual_values) / 228 | (np.abs(converted_validation_output) + np.abs(converted_actual_values))) * 2 229 | smape_list.append(smape) 230 | 231 | except tf.errors.OutOfRangeError: 232 | break 233 | 234 | smape_final = np.mean(smape_list) 235 | print("SMAPE value: {}".format(smape_final)) 236 | session.close() 237 | 238 | return smape_final 239 | -------------------------------------------------------------------------------- /src/LSTM-Models/tfrecords_handler/moving_window/energy_tfrecord_DS_reader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class TFRecordReader: 4 | 5 | def __init__(self, input_size, output_size): 6 | self.__input_size = input_size 7 | self.__output_size = output_size 8 | 9 | def train_data_parser(self, serialized_example): 10 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 11 | serialized_example, 12 | context_features=({ 13 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 14 | }), 15 | sequence_features=({ 16 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 17 | "output": tf.FixedLenSequenceFeature([self.__output_size], dtype=tf.float32) 18 | }) 19 | ) 20 | 21 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["output"] 22 | 23 | 24 | def validation_data_parser(self, serialized_example): 25 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 26 | serialized_example, 27 | context_features=({ 28 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 29 | }), 30 | sequence_features=({ 31 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 32 | "output": tf.FixedLenSequenceFeature([self.__output_size], dtype=tf.float32), 33 | "metadata": tf.FixedLenSequenceFeature([self.__output_size + 2], dtype=tf.float32) 34 | }) 35 | ) 36 | 37 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["output"], sequence_parsed[ 38 | "metadata"] 39 | 40 | def validation_data_parser_without_stl(self, serialized_example): 41 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 42 | serialized_example, 43 | context_features=({ 44 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 45 | }), 46 | sequence_features=({ 47 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 48 | "output": tf.FixedLenSequenceFeature([self.__output_size], dtype=tf.float32), 49 | "metadata": tf.FixedLenSequenceFeature([1], dtype=tf.float32) 50 | }) 51 | ) 52 | 53 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["output"], sequence_parsed[ 54 | "metadata"] 55 | 56 | def test_data_parser(self, serialized_example): 57 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 58 | serialized_example, 59 | context_features=({ 60 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 61 | }), 62 | sequence_features=({ 63 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 64 | "metadata": tf.FixedLenSequenceFeature([self.__output_size + 2], dtype=tf.float32) 65 | }) 66 | ) 67 | 68 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["metadata"] 69 | 70 | def test_data_parser_without_stl(self, serialized_example): 71 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 72 | serialized_example, 73 | context_features=({ 74 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 75 | }), 76 | sequence_features=({ 77 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 78 | "metadata": tf.FixedLenSequenceFeature([1], dtype=tf.float32) 79 | }) 80 | ) 81 | 82 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["metadata"] -------------------------------------------------------------------------------- /src/LSTM-Models/tfrecords_handler/moving_window/energy_tfrecord_DS_writer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import pandas as pd 4 | 5 | class TFRecordWriter: 6 | 7 | def __init__(self, **kwargs): 8 | self.__input_size = kwargs['input_size'] 9 | self.__output_size = kwargs['output_size'] 10 | self.__train_file_path = kwargs['train_file_path'] 11 | self.__validate_file_path = kwargs['validate_file_path'] 12 | self.__test_file_path = kwargs['test_file_path'] 13 | self.__binary_train_file_path = kwargs['binary_train_file_path'] 14 | self.__binary_validation_file_path = kwargs['binary_validation_file_path'] 15 | self.__binary_test_file_path = kwargs['binary_test_file_path'] 16 | 17 | # read the text data from text files 18 | def read_text_data(self): 19 | self.__list_of_training_inputs = [] 20 | self.__list_of_training_outputs = [] 21 | self.__list_of_validation_inputs = [] 22 | self.__list_of_validation_outputs =[] 23 | self.__list_of_validation_metadata = [] 24 | self.__list_of_test_inputs = [] 25 | self.__list_of_test_metadata = [] 26 | 27 | # Reading the training dataset. 28 | train_df = pd.read_csv(self.__train_file_path, nrows=10) 29 | 30 | float_cols = [c for c in train_df if train_df[c].dtype == "float64"] 31 | float32_cols = {c: np.float32 for c in float_cols} 32 | 33 | train_df = pd.read_csv(self.__train_file_path, sep=" ", header=None, engine='c', dtype=float32_cols) 34 | 35 | train_df = train_df.rename(columns={0: 'series'}) 36 | 37 | # Returns unique number of time series in the dataset. 38 | series = pd.unique(train_df['series']) 39 | 40 | # Construct input and output training tuples for each time series. 41 | for ser in series: 42 | one_series_df = train_df[train_df['series'] == ser] 43 | inputs_df = one_series_df.iloc[:, range(1, (self.__input_size + 1))] 44 | outputs_df = one_series_df.iloc[:, range((self.__input_size + 2), (self.__input_size + self.__output_size + 2))] 45 | self.__list_of_training_inputs.append(np.ascontiguousarray(inputs_df, dtype=np.float32)) 46 | self.__list_of_training_outputs.append(np.ascontiguousarray(outputs_df, dtype=np.float32)) 47 | 48 | # Reading the validation dataset. 49 | val_df = pd.read_csv(self.__validate_file_path, nrows=10) 50 | 51 | float_cols = [c for c in val_df if val_df[c].dtype == "float64"] 52 | float32_cols = {c: np.float32 for c in float_cols} 53 | 54 | val_df = pd.read_csv(self.__validate_file_path, sep=" ", header=None, engine='c', dtype=float32_cols) 55 | 56 | val_df = val_df.rename(columns={0: 'series'}) 57 | series = pd.unique(val_df['series']) 58 | 59 | for ser in series: 60 | one_series_df = val_df[val_df['series'] == ser] 61 | inputs_df_test = one_series_df.iloc[:, range(1, (self.__input_size + 1))] 62 | metadata_df = one_series_df.iloc[:, range((self.__input_size + self.__output_size + 3), one_series_df.shape[1])] 63 | outputs_df_test = one_series_df.iloc[:, range((self.__input_size + 2), (self.__input_size + self.__output_size + 2))] 64 | self.__list_of_validation_inputs.append(np.ascontiguousarray(inputs_df_test, dtype=np.float32)) 65 | self.__list_of_validation_outputs.append(np.ascontiguousarray(outputs_df_test, dtype=np.float32)) 66 | self.__list_of_validation_metadata.append(np.ascontiguousarray(metadata_df, dtype=np.float32)) 67 | 68 | # Reading the test file. 69 | test_df = pd.read_csv(self.__test_file_path, nrows=10) 70 | 71 | float_cols = [c for c in test_df if test_df[c].dtype == "float64"] 72 | float32_cols = {c: np.float32 for c in float_cols} 73 | 74 | test_df = pd.read_csv(self.__test_file_path, sep=" ", header=None, engine='c', dtype=float32_cols) 75 | 76 | test_df = test_df.rename(columns={0: 'series'}) 77 | 78 | series1 = pd.unique(test_df['series']) 79 | 80 | for ser in series1: 81 | test_series_df = test_df[test_df['series'] == ser] 82 | test_inputs_df = test_series_df.iloc[:, range(1, (self.__input_size + 1))] 83 | metadata_df = test_series_df.iloc[:, range((self.__input_size + 2), test_series_df.shape[1])] 84 | self.__list_of_test_inputs.append(np.ascontiguousarray(test_inputs_df, dtype=np.float32)) 85 | self.__list_of_test_metadata.append(np.ascontiguousarray(metadata_df, dtype=np.float32)) 86 | 87 | # write the train and validation text data into tfrecord file 88 | def write_train_data_to_tfrecord_file(self): 89 | 90 | writer = tf.python_io.TFRecordWriter(self.__binary_train_file_path, tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)) 91 | 92 | # write the training data file in tfrecords format 93 | for input, output in zip(self.__list_of_training_inputs, self.__list_of_training_outputs): 94 | 95 | sequence_length = input.shape[0] 96 | sequence_example = tf.train.SequenceExample( 97 | context=tf.train.Features(feature={ 98 | "sequence_length" : tf.train.Feature(int64_list=tf.train.Int64List(value=[sequence_length])) 99 | }), 100 | feature_lists = tf.train.FeatureLists(feature_list={ 101 | "input" : tf.train.FeatureList(feature=[ 102 | tf.train.Feature(float_list=tf.train.FloatList(value=input_sequence)) for input_sequence in input 103 | ]), 104 | "output" : tf.train.FeatureList(feature=[ 105 | tf.train.Feature(float_list=tf.train.FloatList(value=output_sequence)) for output_sequence in output 106 | ]) 107 | }) 108 | ) 109 | writer.write(sequence_example.SerializeToString()) 110 | writer.close() 111 | 112 | # write the train and validation text data into tfrecord file 113 | def write_validation_data_to_tfrecord_file(self): 114 | 115 | writer = tf.python_io.TFRecordWriter(self.__binary_validation_file_path, tf.python_io.TFRecordOptions( 116 | tf.python_io.TFRecordCompressionType.ZLIB)) 117 | 118 | # write the training data file in tfrecords format 119 | for input, output, metadata in zip(self.__list_of_validation_inputs, self.__list_of_validation_outputs, self.__list_of_validation_metadata): 120 | sequence_length = input.shape[0] 121 | sequence_example = tf.train.SequenceExample( 122 | context=tf.train.Features(feature={ 123 | "sequence_length": tf.train.Feature(int64_list=tf.train.Int64List(value=[sequence_length])) 124 | }), 125 | feature_lists=tf.train.FeatureLists(feature_list={ 126 | "input": tf.train.FeatureList(feature=[ 127 | tf.train.Feature(float_list=tf.train.FloatList(value=input_sequence)) for input_sequence in input 128 | ]), 129 | "output": tf.train.FeatureList(feature=[ 130 | tf.train.Feature(float_list=tf.train.FloatList(value=output_sequence)) for output_sequence 131 | in output 132 | ]), 133 | "metadata": tf.train.FeatureList(feature=[ 134 | tf.train.Feature(float_list=tf.train.FloatList(value=metadata_sequence)) for metadata_sequence in metadata 135 | ]) 136 | }) 137 | ) 138 | writer.write(sequence_example.SerializeToString()) 139 | writer.close() 140 | 141 | # write the test text data into tfrecord file 142 | def write_test_data_to_tfrecord_file(self): 143 | 144 | writer = tf.python_io.TFRecordWriter(self.__binary_test_file_path, tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)) 145 | 146 | # write the training data file in tfrecords format 147 | for input, metadata in zip(self.__list_of_test_inputs, self.__list_of_test_metadata): 148 | 149 | sequence_length = input.shape[0] 150 | sequence_example = tf.train.SequenceExample( 151 | context=tf.train.Features(feature={ 152 | "sequence_length" : tf.train.Feature(int64_list=tf.train.Int64List(value=[sequence_length])) 153 | }), 154 | feature_lists = tf.train.FeatureLists(feature_list={ 155 | "input" : tf.train.FeatureList(feature=[ 156 | tf.train.Feature(float_list=tf.train.FloatList(value=input_sequence)) for input_sequence in input 157 | ]), 158 | "metadata" : tf.train.FeatureList(feature=[ 159 | tf.train.Feature(float_list=tf.train.FloatList(value=metadata_sequence)) for metadata_sequence in metadata 160 | ]) 161 | }) 162 | ) 163 | writer.write(sequence_example.SerializeToString()) 164 | writer.close() -------------------------------------------------------------------------------- /src/LSTM-Models/tfrecords_handler/moving_window/energy_tfrecord_SE_reader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class TFRecordReader: 4 | 5 | def __init__(self, input_size, output_size): 6 | self.__input_size = input_size 7 | self.__output_size = output_size 8 | 9 | def train_data_parser(self, serialized_example): 10 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 11 | serialized_example, 12 | context_features=({ 13 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 14 | }), 15 | sequence_features=({ 16 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 17 | "output": tf.FixedLenSequenceFeature([self.__output_size], dtype=tf.float32) 18 | }) 19 | ) 20 | 21 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["output"] 22 | 23 | 24 | def validation_data_parser(self, serialized_example): 25 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 26 | serialized_example, 27 | context_features=({ 28 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 29 | }), 30 | sequence_features=({ 31 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 32 | "output": tf.FixedLenSequenceFeature([self.__output_size], dtype=tf.float32), 33 | "metadata": tf.FixedLenSequenceFeature([2], dtype=tf.float32) 34 | }) 35 | ) 36 | 37 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["output"], sequence_parsed[ 38 | "metadata"] 39 | 40 | def validation_data_parser_without_stl(self, serialized_example): 41 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 42 | serialized_example, 43 | context_features=({ 44 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 45 | }), 46 | sequence_features=({ 47 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 48 | "output": tf.FixedLenSequenceFeature([self.__output_size], dtype=tf.float32), 49 | "metadata": tf.FixedLenSequenceFeature([1], dtype=tf.float32) 50 | }) 51 | ) 52 | 53 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["output"], sequence_parsed[ 54 | "metadata"] 55 | 56 | def test_data_parser(self, serialized_example): 57 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 58 | serialized_example, 59 | context_features=({ 60 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 61 | }), 62 | sequence_features=({ 63 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 64 | "metadata": tf.FixedLenSequenceFeature([2], dtype=tf.float32) 65 | }) 66 | ) 67 | 68 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["metadata"] 69 | 70 | def test_data_parser_without_stl(self, serialized_example): 71 | context_parsed, sequence_parsed = tf.parse_single_sequence_example( 72 | serialized_example, 73 | context_features=({ 74 | "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) 75 | }), 76 | sequence_features=({ 77 | "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), 78 | "metadata": tf.FixedLenSequenceFeature([1], dtype=tf.float32) 79 | }) 80 | ) 81 | 82 | return context_parsed["sequence_length"], sequence_parsed["input"], sequence_parsed["metadata"] -------------------------------------------------------------------------------- /src/LSTM-Models/tfrecords_handler/moving_window/energy_tfrecord_SE_writer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import pandas as pd 4 | 5 | class TFRecordWriter: 6 | 7 | def __init__(self, **kwargs): 8 | self.__input_size = kwargs['input_size'] 9 | self.__output_size = kwargs['output_size'] 10 | self.__train_file_path = kwargs['train_file_path'] 11 | self.__validate_file_path = kwargs['validate_file_path'] 12 | self.__test_file_path = kwargs['test_file_path'] 13 | self.__binary_train_file_path = kwargs['binary_train_file_path'] 14 | self.__binary_validation_file_path = kwargs['binary_validation_file_path'] 15 | self.__binary_test_file_path = kwargs['binary_test_file_path'] 16 | 17 | # read the text data from text files 18 | def read_text_data(self): 19 | self.__list_of_training_inputs = [] 20 | self.__list_of_training_outputs = [] 21 | self.__list_of_validation_inputs = [] 22 | self.__list_of_validation_outputs =[] 23 | self.__list_of_validation_metadata = [] 24 | self.__list_of_test_inputs = [] 25 | self.__list_of_test_metadata = [] 26 | 27 | # Reading the training dataset. 28 | train_df = pd.read_csv(self.__train_file_path, nrows=10) 29 | 30 | float_cols = [c for c in train_df if train_df[c].dtype == "float64"] 31 | float32_cols = {c: np.float32 for c in float_cols} 32 | 33 | train_df = pd.read_csv(self.__train_file_path, sep=" ", header=None, engine='c', dtype=float32_cols) 34 | 35 | train_df = train_df.rename(columns={0: 'series'}) 36 | 37 | # Returns unique number of time series in the dataset. 38 | series = pd.unique(train_df['series']) 39 | 40 | # Construct input and output training tuples for each time series. 41 | for ser in series: 42 | one_series_df = train_df[train_df['series'] == ser] 43 | inputs_df = one_series_df.iloc[:, range(1, (self.__input_size + 1))] 44 | outputs_df = one_series_df.iloc[:, range((self.__input_size + 2), (self.__input_size + self.__output_size + 2))] 45 | self.__list_of_training_inputs.append(np.ascontiguousarray(inputs_df, dtype=np.float32)) 46 | self.__list_of_training_outputs.append(np.ascontiguousarray(outputs_df, dtype=np.float32)) 47 | 48 | # Reading the validation dataset. 49 | val_df = pd.read_csv(self.__validate_file_path, nrows=10) 50 | 51 | float_cols = [c for c in val_df if val_df[c].dtype == "float64"] 52 | float32_cols = {c: np.float32 for c in float_cols} 53 | 54 | val_df = pd.read_csv(self.__validate_file_path, sep=" ", header=None, engine='c', dtype=float32_cols) 55 | 56 | val_df = val_df.rename(columns={0: 'series'}) 57 | series = pd.unique(val_df['series']) 58 | 59 | for ser in series: 60 | one_series_df = val_df[val_df['series'] == ser] 61 | inputs_df_test = one_series_df.iloc[:, range(1, (self.__input_size + 1))] 62 | metadata_df = one_series_df.iloc[:, range((self.__input_size + self.__output_size + 3), one_series_df.shape[1])] 63 | outputs_df_test = one_series_df.iloc[:, range((self.__input_size + 2), (self.__input_size + self.__output_size + 2))] 64 | self.__list_of_validation_inputs.append(np.ascontiguousarray(inputs_df_test, dtype=np.float32)) 65 | self.__list_of_validation_outputs.append(np.ascontiguousarray(outputs_df_test, dtype=np.float32)) 66 | self.__list_of_validation_metadata.append(np.ascontiguousarray(metadata_df, dtype=np.float32)) 67 | 68 | # Reading the test file. 69 | test_df = pd.read_csv(self.__test_file_path, nrows=10) 70 | 71 | float_cols = [c for c in test_df if test_df[c].dtype == "float64"] 72 | float32_cols = {c: np.float32 for c in float_cols} 73 | 74 | test_df = pd.read_csv(self.__test_file_path, sep=" ", header=None, engine='c', dtype=float32_cols) 75 | 76 | test_df = test_df.rename(columns={0: 'series'}) 77 | 78 | series1 = pd.unique(test_df['series']) 79 | 80 | for ser in series1: 81 | test_series_df = test_df[test_df['series'] == ser] 82 | test_inputs_df = test_series_df.iloc[:, range(1, (self.__input_size + 1))] 83 | metadata_df = test_series_df.iloc[:, range((self.__input_size + 2), test_series_df.shape[1])] 84 | self.__list_of_test_inputs.append(np.ascontiguousarray(test_inputs_df, dtype=np.float32)) 85 | self.__list_of_test_metadata.append(np.ascontiguousarray(metadata_df, dtype=np.float32)) 86 | 87 | # write the train and validation text data into tfrecord file 88 | def write_train_data_to_tfrecord_file(self): 89 | 90 | writer = tf.python_io.TFRecordWriter(self.__binary_train_file_path, tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)) 91 | 92 | # write the training data file in tfrecords format 93 | for input, output in zip(self.__list_of_training_inputs, self.__list_of_training_outputs): 94 | 95 | sequence_length = input.shape[0] 96 | sequence_example = tf.train.SequenceExample( 97 | context=tf.train.Features(feature={ 98 | "sequence_length" : tf.train.Feature(int64_list=tf.train.Int64List(value=[sequence_length])) 99 | }), 100 | feature_lists = tf.train.FeatureLists(feature_list={ 101 | "input" : tf.train.FeatureList(feature=[ 102 | tf.train.Feature(float_list=tf.train.FloatList(value=input_sequence)) for input_sequence in input 103 | ]), 104 | "output" : tf.train.FeatureList(feature=[ 105 | tf.train.Feature(float_list=tf.train.FloatList(value=output_sequence)) for output_sequence in output 106 | ]) 107 | }) 108 | ) 109 | writer.write(sequence_example.SerializeToString()) 110 | writer.close() 111 | 112 | # write the train and validation text data into tfrecord file 113 | def write_validation_data_to_tfrecord_file(self): 114 | 115 | writer = tf.python_io.TFRecordWriter(self.__binary_validation_file_path, tf.python_io.TFRecordOptions( 116 | tf.python_io.TFRecordCompressionType.ZLIB)) 117 | 118 | # write the training data file in tfrecords format 119 | for input, output, metadata in zip(self.__list_of_validation_inputs, self.__list_of_validation_outputs, self.__list_of_validation_metadata): 120 | sequence_length = input.shape[0] 121 | sequence_example = tf.train.SequenceExample( 122 | context=tf.train.Features(feature={ 123 | "sequence_length": tf.train.Feature(int64_list=tf.train.Int64List(value=[sequence_length])) 124 | }), 125 | feature_lists=tf.train.FeatureLists(feature_list={ 126 | "input": tf.train.FeatureList(feature=[ 127 | tf.train.Feature(float_list=tf.train.FloatList(value=input_sequence)) for input_sequence in input 128 | ]), 129 | "output": tf.train.FeatureList(feature=[ 130 | tf.train.Feature(float_list=tf.train.FloatList(value=output_sequence)) for output_sequence 131 | in output 132 | ]), 133 | "metadata": tf.train.FeatureList(feature=[ 134 | tf.train.Feature(float_list=tf.train.FloatList(value=metadata_sequence)) for metadata_sequence in metadata 135 | ]) 136 | }) 137 | ) 138 | writer.write(sequence_example.SerializeToString()) 139 | writer.close() 140 | 141 | # write the test text data into tfrecord file 142 | def write_test_data_to_tfrecord_file(self): 143 | 144 | writer = tf.python_io.TFRecordWriter(self.__binary_test_file_path, tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)) 145 | 146 | # write the training data file in tfrecords format 147 | for input, metadata in zip(self.__list_of_test_inputs, self.__list_of_test_metadata): 148 | 149 | sequence_length = input.shape[0] 150 | sequence_example = tf.train.SequenceExample( 151 | context=tf.train.Features(feature={ 152 | "sequence_length" : tf.train.Feature(int64_list=tf.train.Int64List(value=[sequence_length])) 153 | }), 154 | feature_lists = tf.train.FeatureLists(feature_list={ 155 | "input" : tf.train.FeatureList(feature=[ 156 | tf.train.Feature(float_list=tf.train.FloatList(value=input_sequence)) for input_sequence in input 157 | ]), 158 | "metadata" : tf.train.FeatureList(feature=[ 159 | tf.train.Feature(float_list=tf.train.FloatList(value=metadata_sequence)) for metadata_sequence in metadata 160 | ]) 161 | }) 162 | ) 163 | writer.write(sequence_example.SerializeToString()) 164 | writer.close() -------------------------------------------------------------------------------- /src/LSTM-Models/utility_scripts/hyperparameter_scripts/hyperparameter_config_reader.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def read_optimal_hyperparameter_values(file_name): 4 | # define dictionary to store the hyperparameter values 5 | hyperparameter_values_dic = {} 6 | 7 | with open(file_name) as configs_file: 8 | configs = configs_file.readlines() 9 | for config in configs: 10 | if not config.startswith('#') and config.strip(): 11 | values = [value.strip() for value in (re.split(">>>", config))] 12 | hyperparameter_values_dic[values[0]] = float(values[1]) 13 | 14 | configs_file.close() 15 | 16 | return hyperparameter_values_dic 17 | 18 | def read_initial_hyperparameter_values(initial_hyperparameter_values_file): 19 | # define dictionary to store the hyperparameter values 20 | hyperparameter_values_dic = {} 21 | 22 | with open(initial_hyperparameter_values_file) as configs_file: 23 | configs = configs_file.readlines() 24 | for config in configs: 25 | if not config.startswith('#') and config.strip(): 26 | values = [value.strip() for value in (re.split("-|,", config))] 27 | hyperparameter_values_dic[values[0]] = [float(values[1]), float(values[2])] 28 | 29 | configs_file.close() 30 | 31 | return hyperparameter_values_dic -------------------------------------------------------------------------------- /src/LSTM-Models/utility_scripts/hyperparameter_scripts/hyperparameter_summary_generator.py: -------------------------------------------------------------------------------- 1 | ## This file concatenates the optimal hyperparameter configs across all the models for the same dataset and writes to a csv file 2 | 3 | import glob 4 | import argparse 5 | import pandas as pd 6 | import re 7 | from utility_scripts.hyperparameter_scripts.hyperparameter_config_reader import read_optimal_hyperparameter_values 8 | 9 | # get the different cluster names as external arguments 10 | argument_parser = argparse.ArgumentParser("Create hyperparameter summaries") 11 | argument_parser.add_argument('--dataset_name', required=True, help='Unique string for the name of the dataset') 12 | 13 | # parse the user arguments 14 | args = argument_parser.parse_args() 15 | dataset_name = args.dataset_name 16 | 17 | input_path = '../results/optimized_configurations/' 18 | 19 | output_path = '../results/optimized_configurations/aggregate_hyperparameter_configs/' 20 | 21 | output_file = output_path + dataset_name + ".csv" 22 | 23 | # get the list of all the files matching the regex 24 | hyperparameter_files = [filename for filename in glob.iglob(input_path + dataset_name + "_*")] 25 | 26 | hyperparameters_df = pd.DataFrame( 27 | columns=["Model_Name", "cell_dimension", "gaussian_noise_stdev", "l2_regularization", "max_epoch_size", 28 | "max_num_epochs", "minibatch_size", "num_hidden_layers", "random_normal_initializer_stdev", 29 | "rate_of_learning"]) 30 | 31 | # concat all the hyperparameters to data frames 32 | for config_file in sorted(hyperparameter_files): 33 | file_name_part = re.split(pattern=dataset_name + "_", string=config_file, maxsplit=1)[1] 34 | 35 | model_name = file_name_part.rsplit('_', 1)[0] 36 | 37 | print(model_name) 38 | 39 | hyperparameter_values_dic = read_optimal_hyperparameter_values(config_file) 40 | if "rate_of_learning" not in hyperparameter_values_dic.keys(): 41 | hyperparameter_values_dic["rate_of_learning"] = "-" 42 | 43 | hyperparameters_df.loc[-1] = [model_name, hyperparameter_values_dic["cell_dimension"], 44 | hyperparameter_values_dic["gaussian_noise_stdev"], 45 | hyperparameter_values_dic["l2_regularization"], 46 | hyperparameter_values_dic["max_epoch_size"], 47 | hyperparameter_values_dic["max_num_epochs"], 48 | hyperparameter_values_dic["minibatch_size"], 49 | hyperparameter_values_dic["num_hidden_layers"], 50 | hyperparameter_values_dic["random_normal_initializer_stdev"], 51 | hyperparameter_values_dic["rate_of_learning"]] 52 | hyperparameters_df.index = hyperparameters_df.index + 1 53 | 54 | # write the errors to csv file 55 | hyperparameters_df.to_csv(output_file, index=False) 56 | -------------------------------------------------------------------------------- /src/LSTM-Models/utility_scripts/invoke_r_energy_DS.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | def invoke_r_script(args, moving_window, without_stl_decomposition): 4 | if moving_window: 5 | subprocess.call(["Rscript", "--vanilla", "error_calculator/moving_window/energy_DS_evaluation.R", args[0],args[1], args[2], args[3], args[4], args[5], args[6]]) 6 | else: 7 | if without_stl_decomposition: 8 | subprocess.call(["Rscript", "--vanilla", "error_calculator/non_moving_window/without_stl_decomposition/final_evaluation.R", args[0], args[1], args[2], args[3], args[4], args[5], args[6]]) 9 | else: 10 | subprocess.call( 11 | ["Rscript", "--vanilla", "error_calculator/non_moving_window/with_stl_decomposition/final_evaluation.R", args[0], args[1], args[2], args[3], args[4], args[5], args[6]]) 12 | -------------------------------------------------------------------------------- /src/LSTM-Models/utility_scripts/invoke_r_energy_SE.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | def invoke_r_script(args, moving_window, without_stl_decomposition): 4 | if moving_window: 5 | subprocess.call(["Rscript", "--vanilla", "error_calculator/moving_window/energy_SE_evaluation.R", args[0],args[1], args[2], args[3], args[4], args[5], args[6]]) 6 | else: 7 | if without_stl_decomposition: 8 | subprocess.call(["Rscript", "--vanilla", "error_calculator/non_moving_window/without_stl_decomposition/final_evaluation.R", args[0], args[1], args[2], args[3], args[4], args[5], args[6]]) 9 | else: 10 | subprocess.call( 11 | ["Rscript", "--vanilla", "error_calculator/non_moving_window/with_stl_decomposition/final_evaluation.R", args[0], args[1], args[2], args[3], args[4], args[5], args[6]]) 12 | -------------------------------------------------------------------------------- /src/LSTM-Models/utility_scripts/persist_optimized_config_results.py: -------------------------------------------------------------------------------- 1 | def persist_results(results, file): 2 | file_object = open(file, mode = 'w') 3 | 4 | for k, v in results.items(): 5 | file_object.write(str(k) + ' >>> ' + str(v) + '\n\n') 6 | 7 | file_object.close() 8 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/Baseline/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/Baseline/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/Baseline/energy_baseline_hourly_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | 16 | for (idr in 1 : nrow(df_train)) { 17 | print(idr) 18 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_baseline_test", sep = '/') 19 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 22 | 23 | time_series_data <- as.numeric(df_train[idr,]) 24 | time_series_mean <- mean(time_series_data) 25 | 26 | time_series_data <- time_series_data/(time_series_mean) 27 | 28 | time_series_log <- log(time_series_data + 1) 29 | time_series_length = length(time_series_log) 30 | 31 | input_windows = embed(time_series_log[1 : (time_series_length)], input_size)[, input_size : 1] 32 | 33 | meanvalues <- rowMeans(input_windows) 34 | input_windows <- input_windows - meanvalues 35 | 36 | sav_df = matrix(NA, ncol = (4 + input_size), nrow = nrow(input_windows)) 37 | sav_df = as.data.frame(sav_df) 38 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 39 | sav_df[, 2 : (input_size + 1)] = input_windows 40 | sav_df[, (input_size + 2)] = '|#' 41 | sav_df[, (input_size + 3)] = time_series_mean 42 | sav_df[, (input_size + 4)] = meanvalues 43 | 44 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 45 | } 46 | 47 | end_time <- Sys.time() 48 | 49 | print(paste0("Total time", (end_time - start_time))) 50 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/Baseline/energy_baseline_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | for (validation in c(TRUE, FALSE)) { 16 | for (idr in 1 : nrow(df_train)) { 17 | print(idr) 18 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_baseline", sep = '/') 19 | 20 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 22 | if (validation) { 23 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 24 | } 25 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 26 | 27 | time_series_data <- as.numeric(df_train[idr,]) 28 | time_series_mean <- mean(time_series_data) 29 | 30 | time_series_data <- time_series_data/(time_series_mean) 31 | 32 | time_series_log <- log(time_series_data + 1) 33 | time_series_length = length(time_series_log) 34 | 35 | if (! validation) { 36 | time_series_length = time_series_length - max_forecast_horizon 37 | time_series_log = time_series_log[1 : time_series_length] 38 | } 39 | 40 | input_windows = embed(time_series_log[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 41 | output_windows = embed(time_series_log[-(1:input_size)], max_forecast_horizon)[, max_forecast_horizon : 1] 42 | 43 | meanvalues <- rowMeans(input_windows) 44 | input_windows <- input_windows - meanvalues 45 | output_windows <- output_windows -meanvalues 46 | 47 | if (validation) { 48 | sav_df = matrix(NA, ncol = (5 + input_size + max_forecast_horizon), nrow = nrow(input_windows)) 49 | sav_df = as.data.frame(sav_df) 50 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 51 | sav_df[, 2 : (input_size + 1)] = input_windows 52 | sav_df[, (input_size + 2)] = '|o' 53 | sav_df[, (input_size + 3):(input_size + max_forecast_horizon +2)] = output_windows 54 | sav_df[, (input_size + max_forecast_horizon + 3)] = '|#' 55 | sav_df[, (input_size + max_forecast_horizon + 4)] = time_series_mean 56 | sav_df[, (input_size + max_forecast_horizon + 5)] = meanvalues 57 | }else { 58 | sav_df = matrix(NA, ncol = (input_size + 2 + max_forecast_horizon), nrow = nrow(input_windows)) 59 | sav_df = as.data.frame(sav_df) 60 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 61 | sav_df[, 2 : (input_size + 1)] = input_windows 62 | sav_df[, (input_size + 2)] = '|o' 63 | sav_df[, (input_size + 3):(input_size + max_forecast_horizon +2)] = output_windows 64 | } 65 | 66 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 67 | } 68 | } 69 | 70 | end_time <- Sys.time() 71 | 72 | print(paste0("Total time", (end_time - start_time))) 73 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-MSTL-DS/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/DS/LSTM-MSTL-DS/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-MSTL-DS/energy_mstl_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | for (idr in 1 : nrow(df_train)) { 16 | print(idr) 17 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_mstl_test", sep = '/') 18 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 19 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 21 | 22 | time_series_data <- as.numeric(df_train[idr,]) 23 | time_series_mean <- mean(time_series_data) 24 | 25 | time_series_data <- time_series_data/(time_series_mean) 26 | 27 | time_series_log <- log(time_series_data +1) 28 | time_series_length = length(time_series_log) 29 | 30 | stl_result = tryCatch({ 31 | sstl = mstl(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2, seasonality_period_3)), s.window = "period") 32 | seasonal_vect_1 = as.numeric(sstl[, 3]) 33 | seasonal_vect_2 = as.numeric(sstl[, 4]) 34 | seasonal_vect_3 = as.numeric(sstl[, 5]) 35 | levels_vect = as.numeric(sstl[, 2]) 36 | values_vect = as.numeric(sstl[, 2] + sstl[, 6]) 37 | cbind(seasonal_vect_1,seasonal_vect_2,seasonal_vect_3,levels_vect, values_vect) 38 | }, error = function(e) { 39 | seasonal_vect_1 = rep(0, length(time_series_length)) 40 | seasonal_vect_2 = rep(0, length(time_series_length)) 41 | seasonal_vect_3 = rep(0, length(time_series_length)) 42 | levels_vect = time_series_log 43 | values_vect = time_series_log 44 | cbind(seasonal_vect_1, seasonal_vect_2,seasonal_vect_3,levels_vect, values_vect) 45 | }) 46 | 47 | comp <- (sstl) 48 | periods <- c(24,168,8766) 49 | h <- max_forecast_horizon 50 | n <- time_series_length 51 | 52 | seasComps <- matrix(NA, nrow=h, ncol=length(periods)) 53 | seasonality = tryCatch({ 54 | for(i in 1:length(periods)) { 55 | m <- periods[i] 56 | seasComps[,i] <- rep(comp[n-(m:1)+1,2+i],trunc(1+(h-1)/m))[1:h] 57 | } 58 | seasComp <- rowSums(seasComps) 59 | seasonality_vector = (seasComp) 60 | c(seasonality_vector) 61 | }, error = function(e) { 62 | seasonality_vector = rep(0, max_forecast_horizon) #stl() may fail, and then we would go on with the seasonality vector=0 63 | c(seasonality_vector) 64 | }) 65 | 66 | input_windows = embed(stl_result[1 : time_series_length , 5], input_size)[, input_size : 1] 67 | level_values = stl_result[input_size : time_series_length, 4] 68 | input_windows = input_windows - level_values 69 | 70 | 71 | sav_df = matrix(NA, ncol = (4 + input_size + max_forecast_horizon), nrow = length(level_values)) 72 | sav_df = as.data.frame(sav_df) 73 | 74 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 75 | sav_df[, 2 : (input_size + 1)] = input_windows 76 | 77 | sav_df[, (input_size + 2)] = '|#' 78 | sav_df[, (input_size + 3)] = time_series_mean 79 | sav_df[, (input_size + 4)] = level_values 80 | 81 | seasonality_windows = matrix(rep(t(seasonality),each=length(level_values)),nrow=length(level_values)) 82 | sav_df[(input_size + 5) : ncol(sav_df)] = seasonality_windows 83 | 84 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 85 | } 86 | 87 | end_time <- Sys.time() 88 | 89 | print(paste0("Total time", (end_time - start_time))) 90 | 91 | 92 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-MSTL-DS/energy_mstl_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | 7 | OUTPUT_DIR = "Mean_Moving_window" 8 | input_size = 24*1.25 9 | max_forecast_horizon <- 24 10 | seasonality_period_1 <- 24 11 | seasonality_period_2 <- 168 12 | seasonality_period_3 <- 8766 13 | 14 | start_time <- Sys.time() 15 | 16 | for (validation in c(TRUE, FALSE)) { 17 | for (idr in 1 : nrow(df_train)) { 18 | print(idr) 19 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_mstl", sep = '/') 20 | 21 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 22 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 23 | if (validation) { 24 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 25 | } 26 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 27 | 28 | time_series_data <- as.numeric(df_train[idr,]) 29 | 30 | time_series_mean <- mean(time_series_data) 31 | 32 | time_series_data <- time_series_data/(time_series_mean) 33 | 34 | time_series_log <- log(time_series_data +1) 35 | time_series_length = length(time_series_log) 36 | 37 | if (! validation) { 38 | time_series_length = time_series_length - max_forecast_horizon 39 | time_series_log = time_series_log[1 : time_series_length] 40 | } 41 | # apply stl 42 | stl_result = tryCatch({ 43 | sstl = mstl(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2, seasonality_period_3)), s.window = "period") 44 | seasonal_vect = as.numeric(sstl[, 3]) + as.numeric(sstl[, 4]) + as.numeric(sstl[, 5]) 45 | levels_vect = as.numeric(sstl[, 2]) 46 | values_vect = as.numeric(sstl[, 2] + sstl[, 6]) 47 | cbind(seasonal_vect, levels_vect, values_vect) 48 | }, error = function(e) { 49 | seasonal_vect = rep(0, length(time_series_length)) 50 | levels_vect = time_series_log 51 | values_vect = time_series_log 52 | cbind(seasonal_vect, levels_vect, values_vect) 53 | }) 54 | 55 | input_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 3], input_size)[, input_size : 1] 56 | output_windows = embed(stl_result[- (1 : input_size) , 3], max_forecast_horizon)[, max_forecast_horizon : 1] 57 | level_values = stl_result[input_size : (time_series_length - max_forecast_horizon), 2] 58 | input_windows = input_windows - level_values 59 | output_windows = output_windows - level_values 60 | if (validation) { 61 | # create the seasonality metadata 62 | seasonality_windows = embed(stl_result[- (1 : input_size) , 1], max_forecast_horizon)[, max_forecast_horizon : 1] 63 | sav_df = matrix(NA, ncol = (5 + input_size + max_forecast_horizon * 2), nrow = length(level_values)) 64 | sav_df = as.data.frame(sav_df) 65 | sav_df[, (input_size + max_forecast_horizon + 3)] = '|#' 66 | sav_df[, (input_size + max_forecast_horizon + 4)] = time_series_mean 67 | sav_df[, (input_size + max_forecast_horizon + 5)] = level_values 68 | sav_df[, (input_size + max_forecast_horizon + 6) : ncol(sav_df)] = seasonality_windows 69 | }else { 70 | sav_df = matrix(NA, ncol = (2 + input_size + max_forecast_horizon), nrow = length(level_values)) 71 | sav_df = as.data.frame(sav_df) 72 | } 73 | 74 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 75 | sav_df[, 2 : (input_size + 1)] = input_windows 76 | 77 | sav_df[, (input_size + 2)] = '|o' 78 | sav_df[, (input_size + 3) : (input_size + max_forecast_horizon + 2)] = output_windows 79 | 80 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 81 | } 82 | } 83 | 84 | end_time <- Sys.time() 85 | 86 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-MSTL7-DS/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/DS/LSTM-MSTL7-DS/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-MSTL7-DS/energy_mstl7_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | for (idr in 1 : nrow(df_train)) { 16 | print(idr) 17 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_mstl7_test", sep = '/') 18 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 19 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 21 | 22 | time_series_data <- as.numeric(df_train[idr,]) 23 | time_series_mean <- mean(time_series_data) 24 | 25 | time_series_data <- time_series_data/(time_series_mean) 26 | 27 | time_series_log <- log(time_series_data +1) 28 | time_series_length = length(time_series_log) 29 | 30 | stl_result = tryCatch({ 31 | sstl = mstl(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2, seasonality_period_3)), s.window = 7) 32 | seasonal_vect_1 = as.numeric(sstl[, 3]) 33 | seasonal_vect_2 = as.numeric(sstl[, 4]) 34 | seasonal_vect_3 = as.numeric(sstl[, 5]) 35 | levels_vect = as.numeric(sstl[, 2]) 36 | values_vect = as.numeric(sstl[, 2] + sstl[, 6]) 37 | cbind(seasonal_vect_1,seasonal_vect_2,seasonal_vect_3,levels_vect, values_vect) 38 | }, error = function(e) { 39 | seasonal_vect_1 = rep(0, length(time_series_length)) 40 | seasonal_vect_2 = rep(0, length(time_series_length)) 41 | seasonal_vect_3 = rep(0, length(time_series_length)) 42 | levels_vect = time_series_log 43 | values_vect = time_series_log 44 | cbind(seasonal_vect_1, seasonal_vect_2,seasonal_vect_3,levels_vect, values_vect) 45 | }) 46 | 47 | comp <- (sstl) 48 | periods <- c(24,168,8766) 49 | h <- max_forecast_horizon 50 | n <- time_series_length 51 | 52 | seasComps <- matrix(NA, nrow=h, ncol=length(periods)) 53 | seasonality = tryCatch({ 54 | for(i in 1:length(periods)) { 55 | m <- periods[i] 56 | seasComps[,i] <- rep(comp[n-(m:1)+1,2+i],trunc(1+(h-1)/m))[1:h] 57 | } 58 | seasComp <- rowSums(seasComps) 59 | seasonality_vector = (seasComp) 60 | c(seasonality_vector) 61 | }, error = function(e) { 62 | seasonality_vector = rep(0, max_forecast_horizon) #stl() may fail, and then we would go on with the seasonality vector=0 63 | c(seasonality_vector) 64 | }) 65 | 66 | input_windows = embed(stl_result[1 : time_series_length , 5], input_size)[, input_size : 1] 67 | level_values = stl_result[input_size : time_series_length, 4] 68 | input_windows = input_windows - level_values 69 | 70 | 71 | sav_df = matrix(NA, ncol = (4 + input_size + max_forecast_horizon), nrow = length(level_values)) 72 | sav_df = as.data.frame(sav_df) 73 | 74 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 75 | sav_df[, 2 : (input_size + 1)] = input_windows 76 | 77 | sav_df[, (input_size + 2)] = '|#' 78 | sav_df[, (input_size + 3)] = time_series_mean 79 | sav_df[, (input_size + 4)] = level_values 80 | 81 | seasonality_windows = matrix(rep(t(seasonality),each=length(level_values)),nrow=length(level_values)) 82 | sav_df[(input_size + 5) : ncol(sav_df)] = seasonality_windows 83 | 84 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 85 | } 86 | 87 | end_time <- Sys.time() 88 | 89 | print(paste0("Total time", (end_time - start_time))) 90 | 91 | 92 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-MSTL7-DS/energy_mstl7_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | 7 | OUTPUT_DIR = "Mean_Moving_window" 8 | input_size = 24*1.25 9 | max_forecast_horizon <- 24 10 | seasonality_period_1 <- 24 11 | seasonality_period_2 <- 168 12 | seasonality_period_3 <- 8766 13 | 14 | start_time <- Sys.time() 15 | 16 | for (validation in c(TRUE, FALSE)) { 17 | for (idr in 1 : nrow(df_train)) { 18 | print(idr) 19 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_mstl7", sep = '/') 20 | 21 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 22 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 23 | if (validation) { 24 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 25 | } 26 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 27 | 28 | time_series_data <- as.numeric(df_train[idr,]) 29 | 30 | time_series_mean <- mean(time_series_data) 31 | 32 | time_series_data <- time_series_data/(time_series_mean) 33 | 34 | time_series_log <- log(time_series_data +1) 35 | time_series_length = length(time_series_log) 36 | 37 | if (! validation) { 38 | time_series_length = time_series_length - max_forecast_horizon 39 | time_series_log = time_series_log[1 : time_series_length] 40 | } 41 | # apply stl 42 | stl_result = tryCatch({ 43 | sstl = mstl(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2, seasonality_period_3)), s.window = 7) 44 | seasonal_vect = as.numeric(sstl[, 3]) + as.numeric(sstl[, 4]) + as.numeric(sstl[, 5]) 45 | levels_vect = as.numeric(sstl[, 2]) 46 | values_vect = as.numeric(sstl[, 2] + sstl[, 6]) 47 | cbind(seasonal_vect, levels_vect, values_vect) 48 | }, error = function(e) { 49 | seasonal_vect = rep(0, length(time_series_length)) 50 | levels_vect = time_series_log 51 | values_vect = time_series_log 52 | cbind(seasonal_vect, levels_vect, values_vect) 53 | }) 54 | 55 | input_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 3], input_size)[, input_size : 1] 56 | output_windows = embed(stl_result[- (1 : input_size) , 3], max_forecast_horizon)[, max_forecast_horizon : 1] 57 | level_values = stl_result[input_size : (time_series_length - max_forecast_horizon), 2] 58 | input_windows = input_windows - level_values 59 | output_windows = output_windows - level_values 60 | if (validation) { 61 | # create the seasonality metadata 62 | seasonality_windows = embed(stl_result[- (1 : input_size) , 1], max_forecast_horizon)[, max_forecast_horizon : 1] 63 | sav_df = matrix(NA, ncol = (5 + input_size + max_forecast_horizon * 2), nrow = length(level_values)) 64 | sav_df = as.data.frame(sav_df) 65 | sav_df[, (input_size + max_forecast_horizon + 3)] = '|#' 66 | sav_df[, (input_size + max_forecast_horizon + 4)] = time_series_mean 67 | sav_df[, (input_size + max_forecast_horizon + 5)] = level_values 68 | sav_df[, (input_size + max_forecast_horizon + 6) : ncol(sav_df)] = seasonality_windows 69 | }else { 70 | sav_df = matrix(NA, ncol = (2 + input_size + max_forecast_horizon), nrow = length(level_values)) 71 | sav_df = as.data.frame(sav_df) 72 | } 73 | 74 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 75 | sav_df[, 2 : (input_size + 1)] = input_windows 76 | 77 | sav_df[, (input_size + 2)] = '|o' 78 | sav_df[, (input_size + 3) : (input_size + max_forecast_horizon + 2)] = output_windows 79 | 80 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 81 | } 82 | } 83 | 84 | end_time <- Sys.time() 85 | 86 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-Prophet-DS/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/DS/LSTM-Prophet-DS/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-Prophet-DS/energy_prophet_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | library(xts) 3 | library(prophet) 4 | set.seed(1234) 5 | 6 | df_train <- read.csv("solar_train.txt", header = FALSE) 7 | 8 | OUTPUT_DIR = "Mean_Moving_window" 9 | input_size = 24*1.25 10 | max_forecast_horizon <- 24 11 | seasonality_period_1 <- 24 12 | seasonality_period_2 <- 168 13 | seasonality_period_3 <- 8766 14 | 15 | start_time <- Sys.time() 16 | 17 | for (idr in 1 : nrow(df_train)) { 18 | print(idr) 19 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_prophet_test", sep = '/') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 22 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 23 | 24 | time_series_data <- as.numeric(df_train[idr,]) 25 | time_series_mean <- mean(time_series_data) 26 | 27 | time_series_data <- time_series_data/(time_series_mean) 28 | 29 | time_series_log <- log(time_series_data + 1) 30 | time_series_length = length(time_series_log) 31 | 32 | ts <- seq(from = as.POSIXct("2010-01-01 00:00"), length.out = time_series_length, by = "hour") 33 | history <- data.frame(ds = ts, y = time_series_log) 34 | 35 | # apply stl 36 | stl_result = tryCatch({ 37 | sstl = prophet(history, daily.seasonality = TRUE, weekly.seasonality = TRUE, yearly.seasonality = TRUE) 38 | sstl_comp = predict(sstl) 39 | seasonal_vect = sstl_comp$daily + sstl_comp$weekly + sstl_comp$yearly 40 | levels_vect = sstl_comp$trend 41 | residuals = (time_series_log - sstl_comp$yhat) 42 | values_vect = residuals + (levels_vect) 43 | cbind(seasonal_vect, levels_vect, values_vect) 44 | }, error = function(e) { 45 | seasonal_vect = rep(0, length(time_series_length)) 46 | levels_vect = time_series_log 47 | values_vect = time_series_log 48 | cbind(seasonal_vect, levels_vect, values_vect) 49 | }) 50 | 51 | future_start <- ts[time_series_length] + 3600 52 | 53 | future <- data.frame(ds = seq(from = future_start , length.out = 24, by = "hour")) 54 | m <- prophet(history, daily.seasonality = TRUE, weekly.seasonality = TRUE, yearly.seasonality = TRUE) 55 | forecast_prophet <- predict(m,future) 56 | 57 | seasonality <- forecast_prophet$daily + forecast_prophet$weekly + forecast_prophet$yearly 58 | 59 | input_windows = embed(stl_result[1 : time_series_length , 3], input_size)[, input_size : 1] 60 | level_values = stl_result[input_size : time_series_length, 2] 61 | input_windows = input_windows - level_values 62 | 63 | sav_df = matrix(NA, ncol = (4 + input_size + max_forecast_horizon), nrow = length(level_values)) 64 | sav_df = as.data.frame(sav_df) 65 | 66 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 67 | sav_df[, 2 : (input_size + 1)] = input_windows 68 | 69 | sav_df[, (input_size + 2)] = '|#' 70 | sav_df[, (input_size + 3)] = time_series_mean 71 | sav_df[, (input_size + 4)] = level_values 72 | 73 | seasonality_windows = matrix(rep(t(seasonality),each=length(level_values)),nrow=length(level_values)) 74 | sav_df[(input_size + 5) : ncol(sav_df)] = seasonality_windows 75 | 76 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 77 | } 78 | 79 | end_time <- Sys.time() 80 | print(paste0("Total time", (end_time - start_time))) 81 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-Prophet-DS/energy_prophet_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | library(xts) 3 | library(prophet) 4 | set.seed(1234) 5 | 6 | df_train <- read.csv("solar_train.txt", header = FALSE) 7 | 8 | OUTPUT_DIR = "Mean_Moving_window" 9 | input_size = 24*1.25 10 | #time_series_data <- as.numeric(hourly_M4[[1]]$x) 11 | #time_series_data <- log(time_series_data) 12 | max_forecast_horizon <- 24 13 | seasonality_period_1 <- 24 14 | seasonality_period_2 <- 168 15 | seasonality_period_3 <- 8766 16 | 17 | start_time <- Sys.time() 18 | 19 | for (validation in c(TRUE, FALSE)) { 20 | for (idr in 1 : nrow(df_train)) { 21 | print(idr) 22 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_prophet", sep = '/') 23 | 24 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 25 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 26 | if (validation) { 27 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 28 | } 29 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 30 | 31 | time_series_data <- as.numeric(df_train[idr,]) 32 | time_series_mean <- mean(time_series_data) 33 | 34 | time_series_data <- time_series_data/(time_series_mean) 35 | 36 | time_series_log <- log(time_series_data + 1) 37 | time_series_length = length(time_series_log) 38 | 39 | if (! validation) { 40 | time_series_length = time_series_length - max_forecast_horizon 41 | time_series_log = time_series_log[1 : time_series_length] 42 | } 43 | 44 | ts <- seq(from = as.POSIXct("2010-01-01 00:00"), length.out = time_series_length, by = "hour") 45 | history <- data.frame(ds = ts, y = time_series_log) 46 | 47 | # apply stl 48 | stl_result = tryCatch({ 49 | sstl = prophet(history, daily.seasonality = TRUE, weekly.seasonality = TRUE, yearly.seasonality = TRUE) 50 | sstl_comp = predict(sstl) 51 | seasonal_vect = sstl_comp$daily + sstl_comp$weekly + sstl_comp$yearly 52 | levels_vect = sstl_comp$trend 53 | residuals = (time_series_log - sstl_comp$yhat) 54 | values_vect = residuals + (levels_vect) 55 | cbind(seasonal_vect, levels_vect, values_vect) 56 | }, error = function(e) { 57 | seasonal_vect = rep(0, length(time_series_length)) 58 | levels_vect = time_series_log 59 | values_vect = time_series_log 60 | cbind(seasonal_vect, levels_vect, values_vect) 61 | }) 62 | 63 | input_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 3], input_size)[, input_size : 1] 64 | output_windows = embed(stl_result[- (1 : input_size) , 3], max_forecast_horizon)[, max_forecast_horizon : 1] 65 | level_values = stl_result[input_size : (time_series_length - max_forecast_horizon), 2] 66 | input_windows = input_windows - level_values 67 | output_windows = output_windows - level_values 68 | if (validation) { 69 | # create the seasonality metadata 70 | seasonality_windows = embed(stl_result[- (1 : input_size) , 1], max_forecast_horizon)[, max_forecast_horizon : 1] 71 | sav_df = matrix(NA, ncol = (5 + input_size + max_forecast_horizon * 2), nrow = length(level_values)) 72 | sav_df = as.data.frame(sav_df) 73 | sav_df[, (input_size + max_forecast_horizon + 3)] = '|#' 74 | sav_df[, (input_size + max_forecast_horizon + 4)] = time_series_mean 75 | sav_df[, (input_size + max_forecast_horizon + 5)] = level_values 76 | sav_df[, (input_size + max_forecast_horizon + 6) : ncol(sav_df)] = seasonality_windows 77 | }else { 78 | sav_df = matrix(NA, ncol = (2 + input_size + max_forecast_horizon), nrow = length(level_values)) 79 | sav_df = as.data.frame(sav_df) 80 | } 81 | 82 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 83 | sav_df[, 2 : (input_size + 1)] = input_windows 84 | 85 | sav_df[, (input_size + 2)] = '|o' 86 | sav_df[, (input_size + 3) : (input_size + max_forecast_horizon + 2)] = output_windows 87 | 88 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 89 | } 90 | } 91 | 92 | end_time <- Sys.time() 93 | 94 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-Tbats-DS/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/DS/LSTM-Tbats-DS/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-Tbats-DS/energy_tbats_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | 7 | OUTPUT_DIR = "Mean_Moving_window" 8 | input_size = 24*1.25 9 | max_forecast_horizon <- 24 10 | seasonality_period_1 <- 24 11 | seasonality_period_2 <- 168 12 | seasonality_period_3 <- 8766 13 | 14 | 15 | start_time <- Sys.time() 16 | 17 | for (idr in 1 : nrow(df_train)) { 18 | print(idr) 19 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_tbats_test", sep = '/') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 22 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 23 | 24 | time_series_data <- as.numeric(df_train[idr,]) 25 | time_series_mean <- mean(time_series_data) 26 | 27 | time_series_data <- time_series_data/(time_series_mean) 28 | time_series_log <- log(time_series_data +1) 29 | time_series_length = length(time_series_log) 30 | 31 | stl_result = tryCatch({ 32 | sstl = tbats(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2,seasonality_period_3))) 33 | sstl_comp = tbats.components(sstl) 34 | seasonal_vect = as.numeric(sstl_comp[, 'season1']) + as.numeric(sstl_comp[, 'season2']) + as.numeric(sstl_comp[, 'season3']) 35 | levels_vect = as.numeric(sstl_comp[, 'level']) 36 | values_vect = sstl$errors + levels_vect 37 | cbind(seasonal_vect, levels_vect, values_vect) 38 | }, error = function(e) { 39 | seasonal_vect = rep(0, length(time_series_length)) 40 | levels_vect = time_series_log 41 | values_vect = time_series_log 42 | sstl_comp = time_series_log 43 | cbind(seasonal_vect, levels_vect, values_vect) 44 | }) 45 | 46 | comp <- tbats.components(sstl) 47 | 48 | periods <- c(24,168,8766) 49 | h <- max_forecast_horizon 50 | n <- time_series_length 51 | 52 | seasComps <- matrix(NA, nrow=h, ncol=length(periods)) 53 | 54 | seasonality = tryCatch({ 55 | seasComps[,1] <- rep(comp[n-(24:1)+1,'season1'],trunc(1+(h-1)/24))[1:h] 56 | seasComps[,2] <- rep(comp[n-(168:1)+1,'season2'],trunc(1+(h-1)/168))[1:h] 57 | seasComps[,3] <- rep(comp[n-(8766:1)+1,'season3'],trunc(1+(h-1)/8766))[1:h] 58 | seasComp <- rowSums(seasComps) 59 | seasonality_vector = (seasComp) 60 | c(seasonality_vector) 61 | }, error = function(e) { 62 | seasonality_vector = rep(0, max_forecast_horizon) #stl() may fail, and then we would go on with the seasonality vector=0 63 | c(seasonality_vector) 64 | }) 65 | 66 | input_windows = embed(stl_result[1 : time_series_length , 3], input_size)[, input_size : 1] 67 | level_values = stl_result[input_size : time_series_length, 2] 68 | input_windows = input_windows - level_values 69 | 70 | 71 | sav_df = matrix(NA, ncol = (4 + input_size + max_forecast_horizon), nrow = length(level_values)) 72 | sav_df = as.data.frame(sav_df) 73 | 74 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 75 | sav_df[, 2 : (input_size + 1)] = input_windows 76 | 77 | sav_df[, (input_size + 2)] = '|#' 78 | sav_df[, (input_size + 3)] = time_series_mean 79 | sav_df[, (input_size + 4)] = level_values 80 | 81 | seasonality_windows = matrix(rep(t(seasonality),each=length(level_values)),nrow=length(level_values)) 82 | sav_df[(input_size + 5) : ncol(sav_df)] = seasonality_windows 83 | 84 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 85 | } 86 | 87 | end_time <- Sys.time() 88 | 89 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/DS/LSTM-Tbats-DS/energy_tbats_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | 7 | OUTPUT_DIR = "Mean_Moving_window" 8 | input_size = 24*1.25 9 | max_forecast_horizon <- 24 10 | seasonality_period_1 <- 24 11 | seasonality_period_2 <- 168 12 | seasonality_period_3 <- 8766 13 | 14 | start_time <- Sys.time() 15 | 16 | for (validation in c(TRUE, FALSE)) { 17 | for (idr in 1 : nrow(df_train)) { 18 | print(idr) 19 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_tbats", sep = '/') 20 | 21 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 22 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 23 | if (validation) { 24 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 25 | } 26 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 27 | 28 | time_series_data <- as.numeric(df_train[idr,]) 29 | time_series_mean <- mean(time_series_data) 30 | 31 | time_series_data <- time_series_data/(time_series_mean) 32 | 33 | time_series_log <- log(time_series_data +1) 34 | time_series_length = length(time_series_log) 35 | 36 | if (! validation) { 37 | time_series_length = time_series_length - max_forecast_horizon 38 | time_series_log = time_series_log[1 : time_series_length] 39 | } 40 | 41 | # apply stl 42 | stl_result = tryCatch({ 43 | sstl = tbats(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2, seasonality_period_3))) 44 | sstl_comp = tbats.components(sstl) 45 | seasonal_vect = as.numeric(sstl_comp[, 'season1']) + as.numeric(sstl_comp[, 'season2']) + as.numeric(sstl_comp[, 'season3']) 46 | levels_vect = as.numeric(sstl_comp[, 'level']) 47 | values_vect = sstl$errors + levels_vect 48 | cbind(seasonal_vect, levels_vect, values_vect) 49 | }, error = function(e) { 50 | seasonal_vect = rep(0, length(time_series_length)) 51 | levels_vect = time_series_log 52 | values_vect = time_series_log 53 | cbind(seasonal_vect, levels_vect, values_vect) 54 | }) 55 | 56 | input_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 3], input_size)[, input_size : 1] 57 | output_windows = embed(stl_result[- (1 : input_size) , 3], max_forecast_horizon)[, max_forecast_horizon : 1] 58 | level_values = stl_result[input_size : (time_series_length - max_forecast_horizon), 2] 59 | input_windows = input_windows - level_values 60 | output_windows = output_windows - level_values 61 | if (validation) { 62 | # create the seasonality metadata 63 | seasonality_windows = embed(stl_result[- (1 : input_size) , 1], max_forecast_horizon)[, max_forecast_horizon : 1] 64 | sav_df = matrix(NA, ncol = (5 + input_size + max_forecast_horizon * 2), nrow = length(level_values)) 65 | sav_df = as.data.frame(sav_df) 66 | sav_df[, (input_size + max_forecast_horizon + 3)] = '|#' 67 | sav_df[, (input_size + max_forecast_horizon + 4)] = time_series_mean 68 | sav_df[, (input_size + max_forecast_horizon + 5)] = level_values 69 | sav_df[, (input_size + max_forecast_horizon + 6) : ncol(sav_df)] = seasonality_windows 70 | }else { 71 | sav_df = matrix(NA, ncol = (2 + input_size + max_forecast_horizon), nrow = length(level_values)) 72 | sav_df = as.data.frame(sav_df) 73 | } 74 | 75 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 76 | sav_df[, 2 : (input_size + 1)] = input_windows 77 | 78 | sav_df[, (input_size + 2)] = '|o' 79 | sav_df[, (input_size + 3) : (input_size + max_forecast_horizon + 2)] = output_windows 80 | 81 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 82 | } 83 | } 84 | 85 | end_time <- Sys.time() 86 | 87 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Fourier-SE-1/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/SE/LSTM-Fourier-SE-1/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Fourier-SE-1/energy_fourierk1_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | for (idr in 1 : nrow(df_train)) { 16 | print(idr) 17 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_fourierk1_test", sep = '/') 18 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 19 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 21 | 22 | time_series_data <- as.numeric(df_train[idr,]) 23 | time_series_mean <- mean(time_series_data) 24 | 25 | time_series_data <- time_series_data/(time_series_mean) 26 | 27 | time_series_log <- log(time_series_data + 1) 28 | time_series_length = length(time_series_log) 29 | 30 | n <- time_series_length 31 | freq1 <- 24 32 | freq2 <- 168 33 | freq3 <- 8766 34 | 35 | xs_1 <- seq(0, 2*pi, length=freq1+1) 36 | xs_2 <- seq(0, 2*pi, length=freq2+1) 37 | xs_3 <- seq(0, 2*pi, length=freq3+1) 38 | 39 | xsrep_1 <- rep(xs_1[-length(xs_1)], length=n) 40 | xsrep_2 <- rep(xs_2[-length(xs_2)], length=n) 41 | xsrep_3 <- rep(xs_3[-length(xs_3)], length=n) 42 | 43 | sin1 <- sin(xsrep_1) 44 | sin2 <- sin(xsrep_2) 45 | sin3 <- sin(xsrep_3) 46 | 47 | cos1 <- cos(xsrep_1) 48 | cos2 <- cos(xsrep_2) 49 | cos3 <- cos(xsrep_3) 50 | 51 | seasonality_sin1 <- sin1 52 | seasonality_sin2 <- sin2 53 | seasonality_sin3 <- sin3 54 | 55 | seasonality_cos1 <- cos1 56 | seasonality_cos2 <- cos2 57 | seasonality_cos3 <- cos3 58 | 59 | input_windows = embed(time_series_log[1 : (time_series_length)], input_size)[, input_size : 1] 60 | seasonality_sin1_windows = embed(seasonality_sin1[1 : (time_series_length)], input_size)[, input_size : 1] 61 | seasonality_sin2_windows = embed(seasonality_sin2[1 : (time_series_length)], input_size)[, input_size : 1] 62 | seasonality_sin3_windows = embed(seasonality_sin3[1 : (time_series_length)], input_size)[, input_size : 1] 63 | seasonality_cos1_windows = embed(seasonality_cos1[1 : (time_series_length)], input_size)[, input_size : 1] 64 | seasonality_cos2_windows = embed(seasonality_cos2[1 : (time_series_length)], input_size)[, input_size : 1] 65 | seasonality_cos3_windows = embed(seasonality_cos3[1 : (time_series_length)], input_size)[, input_size : 1] 66 | 67 | seasonality_sin1_windows = seasonality_sin1_windows[, c(30)] 68 | seasonality_sin2_windows = seasonality_sin2_windows[, c(30)] 69 | seasonality_sin3_windows = seasonality_sin3_windows[, c(30)] 70 | 71 | seasonality_cos1_windows = seasonality_cos1_windows[, c(30)] 72 | seasonality_cos2_windows = seasonality_cos2_windows[, c(30)] 73 | seasonality_cos3_windows = seasonality_cos3_windows[, c(30)] 74 | 75 | meanvalues <- rowMeans(input_windows) 76 | input_windows <- input_windows - meanvalues 77 | 78 | sav_df = matrix(NA, ncol = (6 + input_size + 4), nrow = nrow(input_windows)) 79 | sav_df = as.data.frame(sav_df) 80 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 81 | sav_df[, 2] = seasonality_sin1_windows 82 | sav_df[, 3] = seasonality_sin2_windows 83 | sav_df[, 4] = seasonality_sin3_windows 84 | sav_df[, 5] = seasonality_cos1_windows 85 | sav_df[, 6] = seasonality_cos2_windows 86 | sav_df[, 7] = seasonality_cos3_windows 87 | sav_df[, 8 : (input_size + 6 + 1)] = input_windows 88 | sav_df[, (input_size + 6 + 2)] = '|#' 89 | sav_df[, (input_size + 6 + 3)] = time_series_mean 90 | sav_df[, (input_size + 6 + 4)] = meanvalues 91 | 92 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 93 | } 94 | 95 | end_time <- Sys.time() 96 | 97 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Fourier-SE-1/energy_fourierk1_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | for (validation in c(TRUE, FALSE)) { 16 | for (idr in 1 : nrow(df_train)) { 17 | print(idr) 18 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_fourierk1", sep = '/') 19 | 20 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 22 | if (validation) { 23 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 24 | } 25 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 26 | 27 | time_series_data <- as.numeric(df_train[idr,]) 28 | time_series_mean <- mean(time_series_data) 29 | 30 | time_series_data <- time_series_data/(time_series_mean) 31 | 32 | time_series_log <- log(time_series_data + 1) 33 | time_series_length = length(time_series_log) 34 | 35 | if (! validation) { 36 | time_series_length = time_series_length - max_forecast_horizon 37 | time_series_log = time_series_log[1 : time_series_length] 38 | } 39 | 40 | n <- time_series_length 41 | freq1 <- 24 42 | freq2 <- 168 43 | freq3 <- 8766 44 | 45 | xs_1 <- seq(0, 2*pi, length=freq1+1) 46 | xs_2 <- seq(0, 2*pi, length=freq2+1) 47 | xs_3 <- seq(0, 2*pi, length=freq3+1) 48 | 49 | xsrep_1 <- rep(xs_1[-length(xs_1)], length=n) 50 | xsrep_2 <- rep(xs_2[-length(xs_2)], length=n) 51 | xsrep_3 <- rep(xs_3[-length(xs_3)], length=n) 52 | 53 | sin1 <- sin(xsrep_1) 54 | sin2 <- sin(xsrep_2) 55 | sin3 <- sin(xsrep_3) 56 | 57 | cos1 <- cos(xsrep_1) 58 | cos2 <- cos(xsrep_2) 59 | cos3 <- cos(xsrep_3) 60 | 61 | seasonality_sin1 <- sin1 62 | seasonality_sin2 <- sin2 63 | seasonality_sin3 <- sin3 64 | 65 | seasonality_cos1 <- cos1 66 | seasonality_cos2 <- cos2 67 | seasonality_cos3 <- cos3 68 | 69 | input_windows = embed(time_series_log[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 70 | output_windows = embed(time_series_log[-(1:input_size)], max_forecast_horizon)[, max_forecast_horizon : 1] 71 | seasonality_sin1_windows = embed(seasonality_sin1[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 72 | seasonality_sin2_windows = embed(seasonality_sin2[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 73 | seasonality_sin3_windows = embed(seasonality_sin3[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 74 | 75 | seasonality_cos1_windows = embed(seasonality_cos1[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 76 | seasonality_cos2_windows = embed(seasonality_cos2[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 77 | seasonality_cos3_windows = embed(seasonality_cos3[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 78 | 79 | seasonality_sin1_windows = seasonality_sin1_windows[, c(30)] 80 | seasonality_sin2_windows = seasonality_sin2_windows[, c(30)] 81 | seasonality_sin3_windows = seasonality_sin3_windows[, c(30)] 82 | 83 | seasonality_cos1_windows = seasonality_cos1_windows[, c(30)] 84 | seasonality_cos2_windows = seasonality_cos2_windows[, c(30)] 85 | seasonality_cos3_windows = seasonality_cos3_windows[, c(30)] 86 | 87 | meanvalues <- rowMeans(input_windows) 88 | input_windows <- input_windows - meanvalues 89 | output_windows <- output_windows -meanvalues 90 | 91 | if (validation) { 92 | sav_df = matrix(NA, ncol = (5 + input_size + 6 + max_forecast_horizon), nrow = nrow(input_windows )) 93 | sav_df = as.data.frame(sav_df) 94 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 95 | sav_df[, 2] = seasonality_sin1_windows 96 | sav_df[, 3] = seasonality_sin2_windows 97 | sav_df[, 4] = seasonality_sin3_windows 98 | sav_df[, 5] = seasonality_cos1_windows 99 | sav_df[, 6] = seasonality_cos2_windows 100 | sav_df[, 7] = seasonality_cos3_windows 101 | sav_df[, 8 : (input_size + 6 + 1)] = input_windows 102 | sav_df[, (input_size + 6 + 2)] = '|o' 103 | sav_df[, (input_size + 6 + 3):(input_size + 6 + max_forecast_horizon +2)] = output_windows 104 | sav_df[, (input_size + 6 + max_forecast_horizon + 3)] = '|#' 105 | sav_df[, (input_size + 6 + max_forecast_horizon + 4)] = time_series_mean 106 | sav_df[, (input_size + 6 + max_forecast_horizon + 5)] = meanvalues 107 | }else { 108 | sav_df = matrix(NA, ncol = (2 + input_size + 6 + max_forecast_horizon), nrow = nrow(input_windows)) 109 | sav_df = as.data.frame(sav_df) 110 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 111 | sav_df[, 2] = seasonality_sin1_windows 112 | sav_df[, 3] = seasonality_sin2_windows 113 | sav_df[, 4] = seasonality_sin3_windows 114 | sav_df[, 5] = seasonality_cos1_windows 115 | sav_df[, 6] = seasonality_cos2_windows 116 | sav_df[, 7] = seasonality_cos3_windows 117 | sav_df[, 8 : (input_size + 6 + 1)] = input_windows 118 | sav_df[, (input_size + 6 + 2)] = '|o' 119 | sav_df[, (input_size + 6 + 3):(input_size + 6 + max_forecast_horizon +2)] = output_windows 120 | } 121 | 122 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 123 | } 124 | } 125 | 126 | end_time <- Sys.time() 127 | 128 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Fourier-SE/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/SE/LSTM-Fourier-SE/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Fourier-SE/energy_fourier_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | 16 | for (idr in 1 : nrow(df_train)) { 17 | print(idr) 18 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_fourier_test", sep = '/') 19 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 22 | 23 | time_series_data <- as.numeric(df_train[idr,]) 24 | time_series_mean <- mean(time_series_data) 25 | 26 | time_series_data <- time_series_data/(time_series_mean) 27 | 28 | time_series_log <- log(time_series_data + 1) 29 | time_series_length = length(time_series_log) 30 | 31 | regessor1 <- fourier(msts(time_series_log, seasonal.periods = c(seasonality_period_1)), K = c(10)) 32 | regessor2 <- fourier(msts(time_series_log, seasonal.periods = c(seasonality_period_2)), K = c(20)) 33 | regessor3 <- fourier(msts(time_series_log, seasonal.periods = c(seasonality_period_3)), K = c(20)) 34 | seasonality1 <- rowSums(regessor1) 35 | seasonality2 <- rowSums(regessor2) 36 | seasonality3 <- rowSums(regessor3) 37 | 38 | input_windows = embed(time_series_log[1 : (time_series_length)], input_size)[, input_size : 1] 39 | seasonality_windows_1 = embed(seasonality1[1 : (time_series_length)], input_size)[, input_size : 1] 40 | seasonality_windows_2 = embed(seasonality2[1 : (time_series_length)], input_size)[, input_size : 1] 41 | seasonality_windows_3 = embed(seasonality3[1 : (time_series_length)], input_size)[, input_size : 1] 42 | 43 | seasonality1_windows = seasonality_windows_1[, c(30)] 44 | seasonality2_windows = seasonality_windows_2[, c(30)] 45 | seasonality3_windows = seasonality_windows_3[, c(30)] 46 | 47 | meanvalues <- rowMeans(input_windows) 48 | input_windows <- input_windows - meanvalues 49 | 50 | sav_df = matrix(NA, ncol = (4 + input_size + 3), nrow = nrow(input_windows)) 51 | sav_df = as.data.frame(sav_df) 52 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 53 | sav_df[, 2] = seasonality1_windows 54 | sav_df[, 3] = seasonality2_windows 55 | sav_df[, 4] = seasonality3_windows 56 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 57 | sav_df[, (input_size + 3 + 2)] = '|#' 58 | sav_df[, (input_size + 3 + 3)] = time_series_mean 59 | sav_df[, (input_size + 3 + 4)] = meanvalues 60 | 61 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 62 | } 63 | 64 | 65 | end_time <- Sys.time() 66 | 67 | print(paste0("Total time", (end_time - start_time))) 68 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Fourier-SE/energy_fourier_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | for (validation in c(TRUE, FALSE)) { 16 | for (idr in 1 : nrow(df_train)) { 17 | print(idr) 18 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_fourier", sep = '/') 19 | 20 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 22 | if (validation) { 23 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 24 | } 25 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 26 | 27 | time_series_data <- as.numeric(df_train[idr,]) 28 | time_series_mean <- mean(time_series_data) 29 | 30 | time_series_data <- time_series_data/(time_series_mean) 31 | 32 | time_series_log <- log(time_series_data + 1) 33 | time_series_length = length(time_series_log) 34 | 35 | if (! validation) { 36 | time_series_length = time_series_length - max_forecast_horizon 37 | time_series_log = time_series_log[1 : time_series_length] 38 | } 39 | 40 | regessor1 <- fourier(msts(time_series_log, seasonal.periods = c(seasonality_period_1)), K = c(10)) 41 | regessor2 <- fourier(msts(time_series_log, seasonal.periods = c(seasonality_period_2)), K = c(20)) 42 | regessor3 <- fourier(msts(time_series_log, seasonal.periods = c(seasonality_period_3)), K = c(20)) 43 | seasonality1 <- rowSums(regessor1) 44 | seasonality2 <- rowSums(regessor2) 45 | seasonality3 <- rowSums(regessor3) 46 | 47 | input_windows = embed(time_series_log[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 48 | output_windows = embed(time_series_log[-(1:input_size)], max_forecast_horizon)[, max_forecast_horizon : 1] 49 | seasonality1_windows = embed(seasonality1[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 50 | seasonality2_windows = embed(seasonality2[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 51 | seasonality3_windows = embed(seasonality3[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 52 | 53 | seasonality1_windows = seasonality1_windows[, c(30)] 54 | seasonality2_windows = seasonality2_windows[, c(30)] 55 | seasonality3_windows = seasonality3_windows[, c(30)] 56 | 57 | meanvalues <- rowMeans(input_windows) 58 | input_windows <- input_windows - meanvalues 59 | output_windows <- output_windows -meanvalues 60 | 61 | if (validation) { 62 | sav_df = matrix(NA, ncol = (5 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows )) 63 | sav_df = as.data.frame(sav_df) 64 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 65 | sav_df[, 2] = seasonality1_windows 66 | sav_df[, 3] = seasonality2_windows 67 | sav_df[, 4] = seasonality3_windows 68 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 69 | sav_df[, (input_size + 3 + 2)] = '|o' 70 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 71 | sav_df[, (input_size + 3 + max_forecast_horizon + 3)] = '|#' 72 | sav_df[, (input_size + 3 + max_forecast_horizon + 4)] = time_series_mean 73 | sav_df[, (input_size + 3 + max_forecast_horizon + 5)] = meanvalues 74 | }else { 75 | sav_df = matrix(NA, ncol = (2 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows)) 76 | sav_df = as.data.frame(sav_df) 77 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 78 | sav_df[, 2] = seasonality1_windows 79 | sav_df[, 3] = seasonality2_windows 80 | sav_df[, 4] = seasonality3_windows 81 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 82 | sav_df[, (input_size + 3 + 2)] = '|o' 83 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 84 | } 85 | 86 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 87 | } 88 | } 89 | 90 | end_time <- Sys.time() 91 | 92 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-MSTL-SE/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/SE/LSTM-MSTL-SE/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-MSTL-SE/energy_mstl_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | 3 | df_train <- read.csv("solar_train.txt", header = FALSE) 4 | 5 | OUTPUT_DIR = "Mean_Moving_window" 6 | input_size = 24*1.25 7 | max_forecast_horizon <- 24 8 | seasonality_period_1 <- 24 9 | seasonality_period_2 <- 168 10 | seasonality_period_3 <- 8766 11 | 12 | start_time <- Sys.time() 13 | 14 | for (idr in 1 : nrow(df_train)) { 15 | print(idr) 16 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_mstl_test", sep = '/') 17 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 18 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 19 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 20 | 21 | time_series_data <- as.numeric(df_train[idr,]) 22 | time_series_mean <- mean(time_series_data) 23 | 24 | time_series_data <- time_series_data/(time_series_mean) 25 | 26 | time_series_log <- log(time_series_data + 1) 27 | time_series_length = length(time_series_log) 28 | 29 | stl_result = tryCatch({ 30 | sstl = mstl(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2, seasonality_period_3)), s.window = "period") 31 | seasonal_vect1 = as.numeric(sstl[, 3]) 32 | seasonal_vect2 = as.numeric(sstl[, 4]) 33 | seasonal_vect3 = as.numeric(sstl[, 5]) 34 | levels_vect = as.numeric(sstl[, 2]) 35 | values_vect = as.numeric(sstl[, 2] + sstl[, 6]) 36 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 37 | },error = function(e) { 38 | seasonal_vect1 = rep(0, length(time_series_length)) 39 | seasonal_vect2 = rep(0, length(time_series_length)) 40 | seasonal_vect3 = rep(0, length(time_series_length)) 41 | levels_vect = time_series_log 42 | values_vect = time_series_log 43 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 44 | }) 45 | 46 | 47 | input_windows = embed(time_series_log[1 : (time_series_length)], input_size)[, input_size : 1] 48 | seasonality_windows_1 = embed(stl_result[1 : (time_series_length), 1], input_size)[, input_size : 1] 49 | seasonality_windows_2 = embed(stl_result[1 : (time_series_length), 2], input_size)[, input_size : 1] 50 | seasonality_windows_3 = embed(stl_result[1 : (time_series_length), 3], input_size)[, input_size : 1] 51 | 52 | seasonality1_windows = seasonality_windows_1[, c(30)] 53 | seasonality2_windows = seasonality_windows_2[, c(30)] 54 | seasonality3_windows = seasonality_windows_3[, c(30)] 55 | 56 | meanvalues <- rowMeans(input_windows) 57 | input_windows <- input_windows - meanvalues 58 | 59 | sav_df = matrix(NA, ncol = (4 + input_size + 3), nrow = nrow(input_windows)) 60 | sav_df = as.data.frame(sav_df) 61 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 62 | sav_df[, 2] = seasonality1_windows 63 | sav_df[, 3] = seasonality2_windows 64 | sav_df[, 4] = seasonality3_windows 65 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 66 | sav_df[, (input_size + 3 + 2)] = '|#' 67 | sav_df[, (input_size + 3 + 3)] = time_series_mean 68 | sav_df[, (input_size + 3 + 4)] = meanvalues 69 | 70 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 71 | } 72 | 73 | end_time <- Sys.time() 74 | print(paste0("Total time", (end_time - start_time))) 75 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-MSTL-SE/energy_mstl_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | for (validation in c(TRUE, FALSE)) { 16 | for (idr in 1 : nrow(df_train)) { 17 | print(idr) 18 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_mstl", sep = '/') 19 | 20 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 22 | if (validation) { 23 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 24 | } 25 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 26 | 27 | time_series_data <- as.numeric(df_train[idr,]) 28 | time_series_mean <- mean(time_series_data) 29 | 30 | time_series_data <- time_series_data/(time_series_mean) 31 | 32 | time_series_log <- log(time_series_data + 1) 33 | time_series_length = length(time_series_log) 34 | 35 | if (! validation) { 36 | time_series_length = time_series_length - max_forecast_horizon 37 | time_series_log = time_series_log[1 : time_series_length] 38 | } 39 | # apply stl 40 | stl_result = tryCatch({ 41 | sstl = mstl(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2,seasonality_period_3)), s.window = "period") 42 | seasonal_vect1 = as.numeric(sstl[, 3]) 43 | seasonal_vect2 = as.numeric(sstl[, 4]) 44 | seasonal_vect3 = as.numeric(sstl[, 5]) 45 | levels_vect = as.numeric(sstl[, 2]) 46 | values_vect = as.numeric(sstl[, 2] + sstl[, 6])# this is what we are going to work on: sum of the smooth trend and the random component (the seasonality removed) 47 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 48 | }, error = function(e) { 49 | seasonal_vect1 = rep(0, length(time_series_length)) 50 | seasonal_vect2 = rep(0, length(time_series_length)) 51 | seasonal_vect3 = rep(0, length(time_series_length)) 52 | levels_vect = time_series_log 53 | values_vect = time_series_log 54 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 55 | }) 56 | 57 | 58 | input_windows = embed(time_series_log[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 59 | output_windows = embed(time_series_log[-(1:input_size)], max_forecast_horizon)[, max_forecast_horizon : 1] 60 | seasonality1_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 1], input_size)[, input_size : 1] 61 | seasonality2_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 2], input_size)[, input_size : 1] 62 | seasonality3_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 3], input_size)[, input_size : 1] 63 | 64 | seasonality1_windows = seasonality1_windows[, c(30)] 65 | seasonality2_windows = seasonality2_windows[, c(30)] 66 | seasonality3_windows = seasonality3_windows[, c(30)] 67 | 68 | meanvalues <- rowMeans(input_windows) 69 | input_windows <- input_windows - meanvalues 70 | output_windows <- output_windows -meanvalues 71 | 72 | if (validation) { 73 | sav_df = matrix(NA, ncol = (5 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows )) 74 | sav_df = as.data.frame(sav_df) 75 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 76 | sav_df[, 2] = seasonality1_windows 77 | sav_df[, 3] = seasonality2_windows 78 | sav_df[, 4] = seasonality3_windows 79 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 80 | sav_df[, (input_size + 3 + 2)] = '|o' 81 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 82 | sav_df[, (input_size + 3 + max_forecast_horizon + 3)] = '|#' 83 | sav_df[, (input_size + 3 + max_forecast_horizon + 4)] = time_series_mean 84 | sav_df[, (input_size + 3 + max_forecast_horizon + 5)] = meanvalues 85 | }else { 86 | sav_df = matrix(NA, ncol = (2 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows)) 87 | sav_df = as.data.frame(sav_df) 88 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 89 | sav_df[, 2] = seasonality1_windows 90 | sav_df[, 3] = seasonality2_windows 91 | sav_df[, 4] = seasonality3_windows 92 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 93 | sav_df[, (input_size + 3 + 2)] = '|o' 94 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 95 | } 96 | 97 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 98 | } 99 | } 100 | 101 | end_time <- Sys.time() 102 | 103 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-MSTL7-SE/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/SE/LSTM-MSTL7-SE/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-MSTL7-SE/energy_mstl7_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | for (idr in 1 : nrow(df_train)) { 16 | print(idr) 17 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_mstl7_test", sep = '/') 18 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 19 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 21 | 22 | time_series_data <- as.numeric(df_train[idr,]) 23 | time_series_mean <- mean(time_series_data) 24 | 25 | time_series_data <- time_series_data/(time_series_mean) 26 | 27 | time_series_log <- log(time_series_data + 1) 28 | time_series_length = length(time_series_log) 29 | 30 | stl_result = tryCatch({ 31 | sstl = mstl(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2, seasonality_period_3)), s.window = 7) 32 | seasonal_vect1 = as.numeric(sstl[, 3]) 33 | seasonal_vect2 = as.numeric(sstl[, 4]) 34 | seasonal_vect3 = as.numeric(sstl[, 5]) 35 | levels_vect = as.numeric(sstl[, 2]) 36 | values_vect = as.numeric(sstl[, 2] + sstl[, 6])# this is what we are going to work on: sum of the smooth trend and the random component (the seasonality removed) 37 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 38 | },error = function(e) { 39 | seasonal_vect1 = rep(0, length(time_series_length)) 40 | seasonal_vect2 = rep(0, length(time_series_length)) 41 | seasonal_vect3 = rep(0, length(time_series_length)) 42 | levels_vect = time_series_log 43 | values_vect = time_series_log 44 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 45 | }) 46 | 47 | 48 | input_windows = embed(time_series_log[1 : (time_series_length)], input_size)[, input_size : 1] 49 | seasonality_windows_1 = embed(stl_result[1 : (time_series_length), 1], input_size)[, input_size : 1] 50 | seasonality_windows_2 = embed(stl_result[1 : (time_series_length), 2], input_size)[, input_size : 1] 51 | seasonality_windows_3 = embed(stl_result[1 : (time_series_length), 3], input_size)[, input_size : 1] 52 | 53 | seasonality1_windows = seasonality_windows_1[, c(30)] 54 | seasonality2_windows = seasonality_windows_2[, c(30)] 55 | seasonality3_windows = seasonality_windows_3[, c(30)] 56 | 57 | meanvalues <- rowMeans(input_windows) 58 | input_windows <- input_windows - meanvalues 59 | 60 | sav_df = matrix(NA, ncol = (4 + input_size + 3), nrow = nrow(input_windows)) 61 | sav_df = as.data.frame(sav_df) 62 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 63 | sav_df[, 2] = seasonality1_windows 64 | sav_df[, 3] = seasonality2_windows 65 | sav_df[, 4] = seasonality3_windows 66 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 67 | sav_df[, (input_size + 3 + 2)] = '|#' 68 | sav_df[, (input_size + 3 + 3)] = time_series_mean 69 | sav_df[, (input_size + 3 + 4)] = meanvalues 70 | 71 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 72 | } 73 | 74 | end_time <- Sys.time() 75 | 76 | print(paste0("Total time", (end_time - start_time))) 77 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-MSTL7-SE/energy_mstl7_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | OUTPUT_DIR = "Mean_Moving_window" 7 | input_size = 24*1.25 8 | max_forecast_horizon <- 24 9 | seasonality_period_1 <- 24 10 | seasonality_period_2 <- 168 11 | seasonality_period_3 <- 8766 12 | 13 | start_time <- Sys.time() 14 | 15 | for (validation in c(TRUE, FALSE)) { 16 | for (idr in 1 : nrow(df_train)) { 17 | print(idr) 18 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_mstl7", sep = '/') 19 | 20 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 22 | if (validation) { 23 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 24 | } 25 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 26 | 27 | time_series_data <- as.numeric(df_train[idr,]) 28 | time_series_mean <- mean(time_series_data) 29 | 30 | time_series_data <- time_series_data/(time_series_mean) 31 | 32 | time_series_log <- log(time_series_data + 1) 33 | time_series_length = length(time_series_log) 34 | 35 | if (! validation) { 36 | time_series_length = time_series_length - max_forecast_horizon 37 | time_series_log = time_series_log[1 : time_series_length] 38 | } 39 | # apply stl 40 | stl_result = tryCatch({ 41 | sstl = mstl(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2,seasonality_period_3)), s.window = 7) 42 | seasonal_vect1 = as.numeric(sstl[, 3]) 43 | seasonal_vect2 = as.numeric(sstl[, 4]) 44 | seasonal_vect3 = as.numeric(sstl[, 5]) 45 | levels_vect = as.numeric(sstl[, 2]) 46 | values_vect = as.numeric(sstl[, 2] + sstl[, 6])# this is what we are going to work on: sum of the smooth trend and the random component (the seasonality removed) 47 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 48 | }, error = function(e) { 49 | seasonal_vect1 = rep(0, length(time_series_length)) 50 | seasonal_vect2 = rep(0, length(time_series_length)) 51 | seasonal_vect3 = rep(0, length(time_series_length)) 52 | levels_vect = time_series_log 53 | values_vect = time_series_log 54 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 55 | }) 56 | 57 | 58 | input_windows = embed(time_series_log[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 59 | output_windows = embed(time_series_log[-(1:input_size)], max_forecast_horizon)[, max_forecast_horizon : 1] 60 | seasonality1_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 1], input_size)[, input_size : 1] 61 | seasonality2_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 2], input_size)[, input_size : 1] 62 | seasonality3_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 3], input_size)[, input_size : 1] 63 | 64 | seasonality1_windows = seasonality1_windows[, c(30)] 65 | seasonality2_windows = seasonality2_windows[, c(30)] 66 | seasonality3_windows = seasonality3_windows[, c(30)] 67 | 68 | meanvalues <- rowMeans(input_windows) 69 | input_windows <- input_windows - meanvalues 70 | output_windows <- output_windows -meanvalues 71 | 72 | if (validation) { 73 | sav_df = matrix(NA, ncol = (5 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows )) 74 | sav_df = as.data.frame(sav_df) 75 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 76 | sav_df[, 2] = seasonality1_windows 77 | sav_df[, 3] = seasonality2_windows 78 | sav_df[, 4] = seasonality3_windows 79 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 80 | sav_df[, (input_size + 3 + 2)] = '|o' 81 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 82 | sav_df[, (input_size + 3 + max_forecast_horizon + 3)] = '|#' 83 | sav_df[, (input_size + 3 + max_forecast_horizon + 4)] = time_series_mean 84 | sav_df[, (input_size + 3 + max_forecast_horizon + 5)] = meanvalues 85 | }else { 86 | sav_df = matrix(NA, ncol = (2 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows)) 87 | sav_df = as.data.frame(sav_df) 88 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 89 | sav_df[, 2] = seasonality1_windows 90 | sav_df[, 3] = seasonality2_windows 91 | sav_df[, 4] = seasonality3_windows 92 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 93 | sav_df[, (input_size + 3 + 2)] = '|o' 94 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 95 | } 96 | 97 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 98 | } 99 | } 100 | 101 | end_time <- Sys.time() 102 | 103 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Prophet-SE/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/SE/LSTM-Prophet-SE/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Prophet-SE/energy_prophet_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | library(xts) 3 | library(prophet) 4 | set.seed(1234) 5 | 6 | df_train <- read.csv("solar_train.txt", header = FALSE) 7 | 8 | OUTPUT_DIR = "Mean_Moving_window" 9 | input_size = 24*1.25 10 | max_forecast_horizon <- 24 11 | seasonality_period_1 <- 24 12 | seasonality_period_2 <- 168 13 | seasonality_period_3 <- 8766 14 | 15 | start_time <- Sys.time() 16 | 17 | for (idr in 1 : nrow(df_train)) { 18 | print(idr) 19 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_prophet_test", sep = '/') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 22 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 23 | 24 | time_series_data <- as.numeric(df_train[idr,]) 25 | time_series_mean <- mean(time_series_data) 26 | 27 | time_series_data <- time_series_data/(time_series_mean) 28 | 29 | time_series_log <- log(time_series_data + 1) 30 | time_series_length = length(time_series_log) 31 | 32 | ts <- seq(from = as.POSIXct("2010-01-01 00:00"), length.out = time_series_length, by = "hour") 33 | history <- data.frame(ds = ts, y = time_series_log) 34 | 35 | # apply stl 36 | stl_result = tryCatch({ 37 | sstl = prophet(history, daily.seasonality = TRUE, weekly.seasonality = TRUE, yearly.seasonality = TRUE) 38 | sstl_comp = predict(sstl) 39 | seasonal_vect1 = as.numeric(sstl_comp$daily) 40 | seasonal_vect2 = as.numeric(sstl_comp$weekly) 41 | seasonal_vect3 = as.numeric(sstl_comp$yearly) 42 | levels_vect = sstl_comp$trend 43 | residuals = (time_series_log - sstl_comp$yhat) 44 | values_vect = residuals + (levels_vect) 45 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 46 | }, error = function(e) { 47 | seasonal_vect1 = rep(0, length(time_series_length)) 48 | seasonal_vect2 = rep(0, length(time_series_length)) 49 | levels_vect = time_series_log 50 | values_vect = time_series_log 51 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 52 | }) 53 | 54 | input_windows = embed(time_series_log[1 : (time_series_length)], input_size)[, input_size : 1] 55 | seasonality_windows_1 = embed(stl_result[1 : (time_series_length), 1], input_size)[, input_size : 1] 56 | seasonality_windows_2 = embed(stl_result[1 : (time_series_length), 2], input_size)[, input_size : 1] 57 | seasonality_windows_3 = embed(stl_result[1 : (time_series_length), 3], input_size)[, input_size : 1] 58 | 59 | seasonality1_windows = seasonality_windows_1[, c(30)] 60 | seasonality2_windows = seasonality_windows_2[, c(30)] 61 | seasonality3_windows = seasonality_windows_3[, c(30)] 62 | 63 | meanvalues <- rowMeans(input_windows) 64 | input_windows <- input_windows - meanvalues 65 | 66 | sav_df = matrix(NA, ncol = (4 + input_size + 3), nrow = nrow(input_windows)) 67 | sav_df = as.data.frame(sav_df) 68 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 69 | sav_df[, 2] = seasonality1_windows 70 | sav_df[, 3] = seasonality2_windows 71 | sav_df[, 4] = seasonality3_windows 72 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 73 | sav_df[, (input_size + 3 + 2)] = '|#' 74 | sav_df[, (input_size + 3 + 3)] = time_series_mean 75 | sav_df[, (input_size + 3 + 4)] = meanvalues 76 | 77 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 78 | } 79 | 80 | end_time <- Sys.time() 81 | 82 | print(paste0("Total time", (end_time - start_time))) 83 | -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Prophet-SE/energy_prophet_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | library(xts) 3 | library(prophet) 4 | set.seed(1234) 5 | 6 | df_train <- read.csv("solar_train.txt", header = FALSE) 7 | 8 | OUTPUT_DIR = "Mean_Moving_window" 9 | input_size = 24*1.25 10 | max_forecast_horizon <- 24 11 | seasonality_period_1 <- 24 12 | seasonality_period_2 <- 168 13 | seasonality_period_3 <- 8766 14 | 15 | start_time <- Sys.time() 16 | 17 | for (validation in c(TRUE, FALSE)) { 18 | for (idr in 1 : nrow(df_train)) { 19 | print(idr) 20 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_prophet", sep = '/') 21 | 22 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 23 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 24 | if (validation) { 25 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 26 | } 27 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 28 | 29 | time_series_data <- as.numeric(df_train[idr,]) 30 | time_series_mean <- mean(time_series_data) 31 | 32 | time_series_data <- time_series_data/(time_series_mean) 33 | 34 | time_series_log <- log(time_series_data + 1) 35 | time_series_length = length(time_series_log) 36 | 37 | if (! validation) { 38 | time_series_length = time_series_length - max_forecast_horizon 39 | time_series_log = time_series_log[1 : time_series_length] 40 | } 41 | 42 | ts <- seq(from = as.POSIXct("2010-01-01 00:00"), length.out = time_series_length, by = "hour") 43 | history <- data.frame(ds = ts, y = time_series_log) 44 | 45 | # apply stl 46 | stl_result = tryCatch({ 47 | sstl = prophet(history, daily.seasonality = TRUE, weekly.seasonality = TRUE, yearly.seasonality = TRUE) 48 | sstl_comp = predict(sstl) 49 | seasonal_vect1 = as.numeric(sstl_comp$daily) 50 | seasonal_vect2 = as.numeric(sstl_comp$weekly) 51 | seasonal_vect3 = as.numeric(sstl_comp$yearly) 52 | levels_vect = sstl_comp$trend 53 | residuals = (time_series_log - sstl_comp$yhat) 54 | values_vect = residuals + (levels_vect) 55 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 56 | }, error = function(e) { 57 | seasonal_vect1 = rep(0, length(time_series_length)) 58 | seasonal_vect2 = rep(0, length(time_series_length)) 59 | levels_vect = time_series_log 60 | values_vect = time_series_log 61 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 62 | }) 63 | 64 | 65 | input_windows = embed(time_series_log[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 66 | output_windows = embed(time_series_log[-(1:input_size)], max_forecast_horizon)[, max_forecast_horizon : 1] 67 | seasonality1_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 1], input_size)[, input_size : 1] 68 | seasonality2_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 2], input_size)[, input_size : 1] 69 | seasonality3_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 3], input_size)[, input_size : 1] 70 | 71 | seasonality1_windows = seasonality1_windows[, c(30)] 72 | seasonality2_windows = seasonality2_windows[, c(30)] 73 | seasonality3_windows = seasonality3_windows[, c(30)] 74 | 75 | meanvalues <- rowMeans(input_windows) 76 | input_windows <- input_windows - meanvalues 77 | output_windows <- output_windows -meanvalues 78 | 79 | if (validation) { 80 | # create the seasonality metadata 81 | sav_df = matrix(NA, ncol = (5 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows )) 82 | sav_df = as.data.frame(sav_df) 83 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 84 | sav_df[, 2] = seasonality1_windows 85 | sav_df[, 3] = seasonality2_windows 86 | sav_df[, 4] = seasonality3_windows 87 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 88 | sav_df[, (input_size + 3 + 2)] = '|o' 89 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 90 | sav_df[, (input_size + 3 + max_forecast_horizon + 3)] = '|#' 91 | sav_df[, (input_size + 3 + max_forecast_horizon + 4)] = time_series_mean 92 | sav_df[, (input_size + 3 + max_forecast_horizon + 5)] = meanvalues 93 | }else { 94 | sav_df = matrix(NA, ncol = (2 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows)) 95 | sav_df = as.data.frame(sav_df) 96 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 97 | sav_df[, 2] = seasonality1_windows 98 | sav_df[, 3] = seasonality2_windows 99 | sav_df[, 4] = seasonality3_windows 100 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 101 | sav_df[, (input_size + 3 + 2)] = '|o' 102 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 103 | } 104 | 105 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 106 | } 107 | } 108 | 109 | end_time <- Sys.time() 110 | 111 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Tbats-SE/Mean_Moving_window/empty_commit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kasungayan/LSTMMSNet/664db971ff48edcbbc9aed478ac7ca84c8b1da12/src/LSTM-Preprocessing-Scripts/SE/LSTM-Tbats-SE/Mean_Moving_window/empty_commit.txt -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Tbats-SE/energy_tbats_test.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | 7 | OUTPUT_DIR = "Mean_Moving_window" 8 | input_size = 24*1.25 9 | max_forecast_horizon <- 24 10 | seasonality_period_1 <- 24 11 | seasonality_period_2 <- 168 12 | seasonality_period_3 <- 8766 13 | 14 | start_time <- Sys.time() 15 | 16 | for (idr in 1 : nrow(df_train)) { 17 | print(idr) 18 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_tbats_test", sep = '/') 19 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 20 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 21 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 22 | 23 | time_series_data <- as.numeric(df_train[idr,]) 24 | time_series_mean <- mean(time_series_data) 25 | 26 | time_series_data <- time_series_data/(time_series_mean) 27 | 28 | time_series_log <- log(time_series_data + 1) 29 | time_series_length = length(time_series_log) 30 | 31 | stl_result = tryCatch({ 32 | sstl = tbats(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2, seasonality_period_3))) 33 | sstl_comp = tbats.components(sstl) 34 | seasonal_vect_1 = as.numeric(sstl_comp[, 'season1']) 35 | seasonal_vect_2 = as.numeric(sstl_comp[, 'season2']) 36 | seasonal_vect_3 = as.numeric(sstl_comp[, 'season3']) 37 | levels_vect = as.numeric(sstl_comp[, 'level']) 38 | values_vect = sstl$errors + levels_vect 39 | cbind(seasonal_vect_1,seasonal_vect_2,seasonal_vect_3,levels_vect, values_vect) 40 | }, error = function(e) { 41 | seasonal_vect_1 = rep(0, length(time_series_length)) 42 | seasonal_vect_2 = rep(0, length(time_series_length)) 43 | levels_vect = time_series_log 44 | values_vect = time_series_log 45 | cbind(seasonal_vect_1, seasonal_vect_2, seasonal_vect_3,levels_vect, values_vect) 46 | }) 47 | 48 | 49 | input_windows = embed(time_series_log[1 : (time_series_length)], input_size)[, input_size : 1] 50 | seasonality_windows_1 = embed(stl_result[1 : (time_series_length), 1], input_size)[, input_size : 1] 51 | seasonality_windows_2 = embed(stl_result[1 : (time_series_length), 2], input_size)[, input_size : 1] 52 | seasonality_windows_3 = embed(stl_result[1 : (time_series_length), 3], input_size)[, input_size : 1] 53 | 54 | seasonality1_windows = seasonality_windows_1[, c(30)] 55 | seasonality2_windows = seasonality_windows_2[, c(30)] 56 | seasonality3_windows = seasonality_windows_3[, c(30)] 57 | 58 | meanvalues <- rowMeans(input_windows) 59 | input_windows <- input_windows - meanvalues 60 | 61 | sav_df = matrix(NA, ncol = (4 + input_size + 3), nrow = nrow(input_windows)) 62 | sav_df = as.data.frame(sav_df) 63 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 64 | sav_df[, 2] = seasonality1_windows 65 | sav_df[, 3] = seasonality2_windows 66 | sav_df[, 4] = seasonality3_windows 67 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 68 | sav_df[, (input_size + 3 + 2)] = '|#' 69 | sav_df[, (input_size + 3 + 3)] = time_series_mean 70 | sav_df[, (input_size + 3 + 4)] = meanvalues 71 | 72 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 73 | } 74 | 75 | end_time <- Sys.time() 76 | 77 | print(paste0("Total time", (end_time - start_time))) -------------------------------------------------------------------------------- /src/LSTM-Preprocessing-Scripts/SE/LSTM-Tbats-SE/energy_tbats_train_validation.R: -------------------------------------------------------------------------------- 1 | library(forecast) 2 | set.seed(1234) 3 | 4 | df_train <- read.csv("solar_train.txt", header = FALSE) 5 | 6 | 7 | OUTPUT_DIR = "Mean_Moving_window" 8 | input_size = 24*1.25 9 | max_forecast_horizon <- 24 10 | seasonality_period_1 <- 24 11 | seasonality_period_2 <- 168 12 | seasonality_period_3 <- 8766 13 | 14 | start_time <- Sys.time() 15 | 16 | for (validation in c(TRUE, FALSE)) { 17 | for (idr in 1 : nrow(df_train)) { 18 | print(idr) 19 | OUTPUT_PATH = paste(OUTPUT_DIR, "energy_tbats", sep = '/') 20 | 21 | OUTPUT_PATH = paste(OUTPUT_PATH, max_forecast_horizon, sep = '') 22 | OUTPUT_PATH = paste(OUTPUT_PATH, 'i', input_size, sep = '') 23 | if (validation) { 24 | OUTPUT_PATH = paste(OUTPUT_PATH, 'v', sep = '') 25 | } 26 | OUTPUT_PATH = paste(OUTPUT_PATH, 'txt', sep = '.') 27 | 28 | time_series_data <- as.numeric(df_train[idr,]) 29 | time_series_mean <- mean(time_series_data) 30 | 31 | time_series_data <- time_series_data/(time_series_mean) 32 | 33 | time_series_log <- log(time_series_data + 1) 34 | time_series_length = length(time_series_log) 35 | 36 | if (! validation) { 37 | time_series_length = time_series_length - max_forecast_horizon 38 | time_series_log = time_series_log[1 : time_series_length] 39 | } 40 | 41 | # apply stl 42 | stl_result = tryCatch({ 43 | sstl = tbats(msts(time_series_log, seasonal.periods = c(seasonality_period_1,seasonality_period_2, seasonality_period_3))) 44 | sstl_comp = tbats.components(sstl) 45 | seasonal_vect1 = as.numeric(sstl_comp[, 'season1']) 46 | seasonal_vect2 = as.numeric(sstl_comp[, 'season2']) 47 | seasonal_vect3 = as.numeric(sstl_comp[, 'season3']) 48 | levels_vect = as.numeric(sstl_comp[, 'level']) 49 | values_vect = sstl$errors + levels_vect 50 | cbind(seasonal_vect_1,seasonal_vect_2,seasonal_vect3,levels_vect, values_vect) 51 | }, error = function(e) { 52 | seasonal_vect1 = rep(0, length(time_series_length)) 53 | seasonal_vect2 = rep(0, length(time_series_length)) 54 | seasonal_vect3 = rep(0, length(time_series_length)) 55 | levels_vect = time_series_log 56 | values_vect = time_series_log 57 | cbind(seasonal_vect1, seasonal_vect2, seasonal_vect3, levels_vect, values_vect) 58 | }) 59 | 60 | input_windows = embed(time_series_log[1 : (time_series_length - max_forecast_horizon)], input_size)[, input_size : 1] 61 | output_windows = embed(time_series_log[-(1:input_size)], max_forecast_horizon)[, max_forecast_horizon : 1] 62 | seasonality1_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 1], input_size)[, input_size : 1] 63 | seasonality2_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 2], input_size)[, input_size : 1] 64 | seasonality3_windows = embed(stl_result[1 : (time_series_length - max_forecast_horizon), 3], input_size)[, input_size : 1] 65 | 66 | seasonality1_windows = seasonality1_windows[, c(30)] 67 | seasonality2_windows = seasonality2_windows[, c(30)] 68 | seasonality3_windows = seasonality3_windows[, c(30)] 69 | 70 | meanvalues <- rowMeans(input_windows) 71 | input_windows <- input_windows - meanvalues 72 | output_windows <- output_windows -meanvalues 73 | 74 | if (validation) { 75 | # create the seasonality metadata 76 | sav_df = matrix(NA, ncol = (5 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows )) 77 | sav_df = as.data.frame(sav_df) 78 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 79 | sav_df[, 2] = seasonality1_windows 80 | sav_df[, 3] = seasonality2_windows 81 | sav_df[, 4] = seasonality3_windows 82 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 83 | sav_df[, (input_size + 3 + 2)] = '|o' 84 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 85 | sav_df[, (input_size + 3 + max_forecast_horizon + 3)] = '|#' 86 | sav_df[, (input_size + 3 + max_forecast_horizon + 4)] = time_series_mean 87 | sav_df[, (input_size + 3 + max_forecast_horizon + 5)] = meanvalues 88 | }else { 89 | sav_df = matrix(NA, ncol = (2 + input_size + 3 + max_forecast_horizon), nrow = nrow(input_windows)) 90 | sav_df = as.data.frame(sav_df) 91 | sav_df[, 1] = paste(idr - 1, '|i', sep = '') 92 | sav_df[, 2] = seasonality1_windows 93 | sav_df[, 3] = seasonality2_windows 94 | sav_df[, 4] = seasonality3_windows 95 | sav_df[, 5 : (input_size + 3 + 1)] = input_windows 96 | sav_df[, (input_size + 3 + 2)] = '|o' 97 | sav_df[, (input_size + 3 + 3):(input_size + 3 + max_forecast_horizon +2)] = output_windows 98 | } 99 | 100 | write.table(sav_df, file = OUTPUT_PATH, row.names = F, col.names = F, sep = " ", quote = F, append = TRUE) 101 | } 102 | } 103 | 104 | end_time <- Sys.time() 105 | 106 | print(paste0("Total time", (end_time - start_time))) 107 | 108 | #forecast_24 = stlf(ts(sstl[, 3] , frequency = 24), "period", h = 48) --------------------------------------------------------------------------------