├── .gitignore
├── requirements.txt
├── src
    ├── models
    │   ├── models_classes.py
    │   ├── model_callbacks.py
    │   ├── lobcast_model.py
    │   ├── utils_models.py
    │   ├── mlp
    │   │   └── mlp.py
    │   ├── binctabl
    │   │   ├── binctabl.py
    │   │   └── base.py
    │   ├── cnn1
    │   │   └── cnn1.py
    │   └── cnn2
    │   │   └── cnn2.py
    ├── batch_experiments
    │   └── setup01.py
    ├── run_batch.py
    ├── run.py
    ├── metrics
    │   ├── metrics_log.py
    │   ├── metrics_learning.py
    │   └── report.py
    ├── data_preprocessing
    │   ├── dataModule.py
    │   ├── utils_dataset.py
    │   └── FI
    │   │   └── FIDataBuilder.py
    ├── hyper_parameters.py
    ├── constants.py
    ├── utils
    │   ├── ultils_run.py
    │   ├── utils_generic.py
    │   └── util_training.py
    ├── settings.py
    └── lobcast.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | /thrash/
3 | /data/
4 | /data/experiments/
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | argparse==1.1
 2 | backtesting==0.3.3
 3 | matplotlib==3.5.0
 4 | numpy==1.18.5
 5 | pandas==1.2.4
 6 | plotly==4.14.3
 7 | psutil==5.9.4
 8 | pytorch_lightning==1.8.6
 9 | scikit_learn==0.24.2
10 | seaborn==0.12.2
11 | torch==1.13.1
12 | tqdm==4.64.1
13 | wandb


--------------------------------------------------------------------------------
/src/models/models_classes.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # MODELS
 3 | import src.models.mlp.mlp as mlp
 4 | import src.models.cnn1.cnn1 as cnn1
 5 | import src.models.cnn2.cnn2 as cnn2
 6 | import src.models.binctabl.binctabl as binctabl
 7 | 
 8 | from enum import Enum
 9 | 
10 | 
11 | class Models(Enum):
12 |     MLP = mlp.MLP_lm
13 |     CNN1 = cnn1.CNN_lm
14 |     CNN2 = cnn2.CNN2_ml
15 |     BINCTABL = binctabl.BinCTABL_ml
16 |     # add new modules here
17 | 


--------------------------------------------------------------------------------
/src/models/model_callbacks.py:
--------------------------------------------------------------------------------
 1 | import pytorch_lightning as pl
 2 | import src.constants as cst
 3 | 
 4 | 
 5 | def callback_save_model(path, fname_root, metric, top_k=3):
 6 |     check_point_callback = pl.callbacks.ModelCheckpoint(
 7 |         monitor=metric,
 8 |         verbose=True,
 9 |         save_top_k=top_k,
10 |         mode='max',
11 |         dirpath=path,
12 |         filename=fname_root + '_{epoch}-{' + metric + ':.2f}'
13 |     )
14 |     return check_point_callback
15 | 
16 | 
17 | # TODO avoid early stopping
18 | 


--------------------------------------------------------------------------------
/src/models/lobcast_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytorch_lightning as pl
 3 | from src.hyper_parameters import HPTunable
 4 | 
 5 | 
 6 | class LOBCAST_module:
 7 |     def __init__(self, model, tunable_parameters=None):
 8 |         self.model = model
 9 |         self.tunable_parameters = tunable_parameters if tunable_parameters is not None else HPTunable()
10 |         self.name = model.__class__.__name__
11 |         self.line_color = "red"
12 |         self.line_shape = "-"
13 | 
14 | 
15 | class LOBCAST_model(pl.LightningModule):
16 |     def __init__(self, input_dim, output_dim):
17 |         super().__init__()
18 |         self.input_dim = input_dim
19 |         self.output_dim = output_dim
20 | 


--------------------------------------------------------------------------------
/src/batch_experiments/setup01.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from src.settings import SettingsExp
 3 | import src.constants as cst
 4 | 
 5 | # cartesian product of the tests
 6 | INDEPENDENT_VARIABLES = {
 7 |     SettingsExp.SEED: [0],
 8 |     SettingsExp.PREDICTION_MODEL: [cst.Models.CNN1, cst.Models.CNN2],
 9 |     SettingsExp.PREDICTION_HORIZON_FUTURE: [10, 5],
10 |     SettingsExp.PREDICTION_HORIZON_PAST: [1],
11 |     SettingsExp.OBSERVATION_PERIOD: [100]
12 | }
13 | 
14 | # no entry in here = cartesian product of INDEPENDENT_VARIABLES
15 | # k: v, when k does not vary, the variable k is fixed to v
16 | INDEPENDENT_VARIABLES_CONSTRAINTS = {
17 |     SettingsExp.PREDICTION_MODEL: cst.Models.CNN1,  # when other variables vary, PREDICTION_MODEL = MLP
18 |     SettingsExp.PREDICTION_HORIZON_FUTURE: 5
19 | }
20 | 


--------------------------------------------------------------------------------
/src/run_batch.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import wandb
 4 | from src.lobcast import LOBCAST
 5 | from src.utils.ultils_run import grid_search_configurations, ExecutionPlan, wandb_init
 6 | from src.settings import SettingsExp
 7 | import src.constants as cst
 8 | 
 9 | from src.batch_experiments import setup01
10 | from src.run import run_simulation
11 | 
12 | 
13 | def main():
14 |     sim = LOBCAST()
15 | 
16 |     # for multiple experiments
17 |     ep = ExecutionPlan(setup01.INDEPENDENT_VARIABLES,
18 |                        setup01.INDEPENDENT_VARIABLES_CONSTRAINTS)
19 | 
20 |     setting_confs = ep.configurations()
21 | 
22 |     print("Running the following configurations:")
23 |     print(setting_confs)
24 | 
25 |     for setting_conf in setting_confs:
26 |         sim.update_settings(setting_conf)
27 |         run_simulation(sim)
28 |         print("done:", setting_conf)
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     main()
33 | 
34 | 
35 | # python -m src.run
36 | 


--------------------------------------------------------------------------------
/src/models/utils_models.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import src.constants as cst
 4 | from src.utils.util_training import LOBCAST_NNEngine
 5 | 
 6 | 
 7 | # MODELS
 8 | from src.utils.utils_generic import get_class_arguments
 9 | 
10 | 
11 | def get_tuned_parameters(sim, params):
12 |     values = [sim.HP_TUNED.__getattribute__(p) for p in params]
13 |     return values
14 | 
15 | 
16 | def pick_model(sim, data_module):
17 |     loss_weights = None
18 | 
19 |     num_features = data_module.x_shape
20 |     num_classes = data_module.num_classes
21 | 
22 |     args = get_class_arguments(sim.SETTINGS.PREDICTION_MODEL.value.model)[2:]
23 |     args_values = get_tuned_parameters(sim, args)
24 |     neural_architecture = sim.SETTINGS.PREDICTION_MODEL.value.model(num_features, num_classes, *args_values)
25 | 
26 |     engine = LOBCAST_NNEngine(
27 |         neural_architecture,
28 |         loss_weights,
29 |         hps=sim.HP_TUNED,
30 |         metrics_log=sim.METRICS,
31 |         wandb_log=sim.WANDB_INSTANCE,
32 |     ).to(sim.SETTINGS.DEVICE)
33 | 
34 |     return engine
35 | 


--------------------------------------------------------------------------------
/src/run.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import src.constants as cst
 4 | import wandb
 5 | from src.lobcast import LOBCAST
 6 | from src.utils.ultils_run import grid_search_configurations, wandb_init
 7 | from src.settings import SettingsExp
 8 | 
 9 | 
10 | def run_simulation(sim):
11 |     if not sim.SETTINGS.IS_WANDB:
12 |         # generates runs based on a grid search of the hyper params
13 |         hparams_configs = grid_search_configurations(sim.HP_TUNABLE.__dict__)
14 |         for hparams_config in hparams_configs:
15 |             sim.update_hyper_parameters(hparams_config)
16 |             sim.end_setup()
17 |             sim.run()
18 |             sim.evaluate()
19 |             sim.close()
20 |     else:
21 |         # hyper params search is handled by wandb
22 |         sweep_id, wandb_lunch = wandb_init(sim)
23 |         wandb.agent(sweep_id, function=lambda: wandb_lunch(sim))
24 | 
25 | 
26 | def main():
27 |     sim = LOBCAST()
28 | 
29 |     setting_conf = sim.parse_cl_arguments()
30 |     sim.update_settings(setting_conf)
31 |     run_simulation(sim)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 
37 | 
38 | # python -m src.run --PREDICTION_MODEL MLP
39 | 


--------------------------------------------------------------------------------
/src/metrics/metrics_log.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from src.utils.utils_generic import write_json, is_jsonable
 3 | from collections import defaultdict
 4 | 
 5 | 
 6 | class Metrics:
 7 |     def __init__(self, path, fname_root):
 8 |         self.metrics = defaultdict(dict)  # dict logged every X epochs
 9 |         self.path = path
10 |         self.fname_root = fname_root
11 |         self.is_best_model = False
12 | 
13 |     def add_metric(self, epoch, dataset_type, eval_dict):
14 |         self.metrics[dataset_type][epoch] = eval_dict
15 | 
16 |     def reset_stats(self):
17 |         self.metrics = defaultdict(dict)
18 | 
19 |     def dump_info(self, settings, h_parameters):
20 |         print("Dumping config at", self.path)
21 |         merged = {**settings, **h_parameters}
22 |         merged = {k: (v if is_jsonable(v) else str(v)) for k, v in merged.items()}  # make string unserializable vals
23 |         write_json(merged, self.path + self.fname_root + "_" + "config.json")
24 |         return merged
25 | 
26 |     def dump_metrics(self, fname):
27 |         print("Dumping metrics at", self.path)
28 |         write_json(self.metrics, self.path + self.fname_root + "_" + fname)
29 |         return self.metrics
30 | 


--------------------------------------------------------------------------------
/src/data_preprocessing/dataModule.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytorch_lightning as pl
 3 | from torch.utils.data import DataLoader
 4 | import src.constants as cst
 5 | 
 6 | 
 7 | class DataModule(pl.LightningDataModule):
 8 |     """ Splits the datasets in TRAIN, VALIDATION_MODEL, TEST. """
 9 | 
10 |     def __init__(self, train_set, val_set, test_set, batch_size, device, is_shuffle_train=True):
11 |         super().__init__()
12 | 
13 |         self.train_set = train_set
14 |         self.val_set = val_set
15 |         self.test_set = test_set
16 | 
17 |         self.batch_size = batch_size
18 |         self.is_shuffle_train = is_shuffle_train
19 | 
20 |         self.x_shape = self.test_set.x_shape
21 |         self.num_classes = cst.NUM_CLASSES
22 |         self.pin_memory = True if device == 'cuda' else False
23 | 
24 |     def setup(self, stage=None):
25 |         pass
26 | 
27 |     def train_dataloader(self):
28 |         return DataLoader(self.train_set, batch_size=self.batch_size, shuffle=self.is_shuffle_train, pin_memory=self.pin_memory, drop_last=False)
29 | 
30 |     def val_dataloader(self):
31 |         return DataLoader(self.val_set, batch_size=self.batch_size, shuffle=False, pin_memory=self.pin_memory, drop_last=False)
32 | 
33 |     def test_dataloader(self):
34 |         return DataLoader(self.test_set, batch_size=self.batch_size, shuffle=False, pin_memory=self.pin_memory, drop_last=False)
35 | 


--------------------------------------------------------------------------------
/src/hyper_parameters.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from src.utils.utils_generic import dict_to_string
 3 | 
 4 | 
 5 | class Hyperparameters:
 6 |     def add_hyperparameters(self, params: dict):
 7 |         for key, value in params.items():
 8 |             self.__setattr__(key, value)
 9 | 
10 |     def add_hyperparameter(self, key, value):
11 |         self.__setattr__(key, value)
12 | 
13 |     def __repr__(self):
14 |         return dict_to_string(self.__dict__)
15 | 
16 | 
17 | class HPTuned(Hyperparameters):
18 |     """ Tuned hyperparameters of the models. Hyperparameters are assigned with their chosen value
19 |     by an external scheduler (e.g. wandb grid search)."""
20 | 
21 |     def update_hyperparameter(self, hp, value):
22 |         try:
23 |             self.__getattribute__(hp)
24 |             self.__setattr__(hp, value)
25 | 
26 |         except AttributeError:
27 |             raise AttributeError(f"This class has no {hp} to set.")
28 | 
29 | 
30 | class HPTunable(Hyperparameters):
31 |     """ Tunable hyperparameters of the models. Contains the domains of hyperparameters exploration. """
32 |     def __init__(self):
33 |         self.BATCH_SIZE = {"values": [32, 64]}   # {"min": 0.0001, "max": 0.1} or {"values": [11]}
34 |         self.LEARNING_RATE = {"values": [0.0001, 0.001, 0.01]}  # {"min": 0.0001, "max": 0.1}  # {"min": 0.0001, "max": 0.1}
35 |         self.OPTIMIZER = {"values": ["SGD"]}
36 | 


--------------------------------------------------------------------------------
/src/models/mlp/mlp.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Using Deep Learning to Detect Price Change Indications in Financial Markets
 3 | # Source: https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8081663
 4 | 
 5 | from torch import nn
 6 | from src.models.lobcast_model import LOBCAST_model, LOBCAST_module
 7 | from src.hyper_parameters import HPTunable
 8 | 
 9 | 
10 | class MLP(LOBCAST_model):
11 |     def __init__(
12 |             self,
13 |             input_dim,
14 |             output_dim,
15 |             hidden_layer_dim,
16 |             p_dropout
17 |     ):
18 |         super().__init__(input_dim, output_dim)
19 | 
20 |         flat_dims = self.input_dim[0] * self.input_dim[1]
21 |         self.linear1 = nn.Linear(flat_dims, hidden_layer_dim)
22 |         self.leakyReLU = nn.LeakyReLU()
23 |         self.dropout = nn.Dropout(p=p_dropout)
24 |         self.linear2 = nn.Linear(hidden_layer_dim, self.output_dim)
25 | 
26 |     def forward(self, x):
27 |         # [batch_size x 40 x observation_length]
28 |         x = x.view(x.size(0), -1).float()
29 |         out = self.linear1(x)
30 |         out = self.leakyReLU(out)
31 |         out = self.dropout(out)
32 |         out = self.linear2(out)
33 |         return out
34 | 
35 | 
36 | class HP(HPTunable):
37 |     def __init__(self):
38 |         super().__init__()
39 |         self.hidden_layer_dim = {"values": [128]}
40 |         self.p_dropout = {"values": [.1, .5]}
41 | 
42 | 
43 | MLP_lm = LOBCAST_module(MLP, HP())
44 | 


--------------------------------------------------------------------------------
/src/data_preprocessing/utils_dataset.py:
--------------------------------------------------------------------------------
 1 | import src.constants as cst
 2 | from src.data_preprocessing.dataModule import DataModule
 3 | from src.data_preprocessing.FI.FIDataBuilder import FIDataset
 4 | 
 5 | 
 6 | def prepare_data_fi(sim):
 7 |     fi_train, fi_val, fi_test = None, None, None
 8 | 
 9 |     if not sim.SETTINGS.IS_TEST_ONLY:
10 |         fi_train = FIDataset(
11 |             cst.DATASET_FI,
12 |             dataset_type=cst.DatasetType.TRAIN,
13 |             horizon=sim.SETTINGS.PREDICTION_HORIZON_FUTURE,
14 |             observation_length=sim.SETTINGS.OBSERVATION_PERIOD,
15 |             train_val_split=sim.SETTINGS.TRAIN_SET_PORTION,
16 |             n_trends=sim.SETTINGS.N_TRENDS
17 |         )
18 | 
19 |         fi_val = FIDataset(
20 |             cst.DATASET_FI,
21 |             dataset_type=cst.DatasetType.VALIDATION,
22 |             horizon=sim.SETTINGS.PREDICTION_HORIZON_FUTURE,
23 |             observation_length=sim.SETTINGS.OBSERVATION_PERIOD,
24 |             train_val_split=sim.SETTINGS.TRAIN_SET_PORTION,
25 |             n_trends=sim.SETTINGS.N_TRENDS
26 |         )
27 | 
28 |     fi_test = FIDataset(
29 |         cst.DATASET_FI,
30 |         dataset_type=cst.DatasetType.TEST,
31 |         observation_length=sim.SETTINGS.OBSERVATION_PERIOD,
32 |         horizon=sim.SETTINGS.PREDICTION_HORIZON_FUTURE,
33 |         train_val_split=sim.SETTINGS.TRAIN_SET_PORTION,
34 |         n_trends=sim.SETTINGS.N_TRENDS
35 |     )
36 | 
37 |     fi_dm = DataModule(
38 |         fi_train, fi_val, fi_test,
39 |         sim.HP_TUNED.BATCH_SIZE,
40 |         sim.SETTINGS.DEVICE,
41 |         sim.SETTINGS.IS_SHUFFLE_TRAIN_SET
42 |     )
43 |     return fi_dm
44 | 
45 | 
46 | def pick_dataset(sim):
47 |     if sim.SETTINGS.DATASET_NAME == cst.DatasetFamily.FI:
48 |         return prepare_data_fi(sim)
49 |     else:
50 |         raise ValueError(f"Unhandled dataset name: {sim.SETTINGS}")
51 | 


--------------------------------------------------------------------------------
/src/metrics/metrics_learning.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from sklearn.metrics import classification_report
 4 | from sklearn.metrics import matthews_corrcoef
 5 | from sklearn.metrics import cohen_kappa_score
 6 | from sklearn.metrics import confusion_matrix
 7 | 
 8 | import src.constants as cst
 9 | import numpy as np
10 | 
11 | 
12 | def compute_metrics(truth, prediction, loss_vals):
13 |     truth = torch.Tensor(truth)
14 |     prediction = torch.Tensor(prediction)
15 | 
16 |     cr = classification_report(truth, prediction, output_dict=True, zero_division=0)
17 |     accuracy = cr['accuracy']  # MICRO-F1
18 | 
19 |     f1score = cr['macro avg']['f1-score']  # MACRO-F1
20 |     precision = cr['macro avg']['precision']  # MACRO-PRECISION
21 |     recall = cr['macro avg']['recall']  # MACRO-RECALL
22 | 
23 |     f1score_w = cr['weighted avg']['f1-score']  # WEIGHTED-F1
24 |     precision_w = cr['weighted avg']['precision']  # WEIGHTED-PRECISION
25 |     recall_w = cr['weighted avg']['recall']  # WEIGHTED-RECALL
26 | 
27 |     mcc = matthews_corrcoef(truth, prediction)
28 |     cok = cohen_kappa_score(truth, prediction)
29 | 
30 |     # y_actu = pd.Series(truth, name='actual')
31 |     # y_pred = pd.Series(prediction, name='predicted')
32 |     mat_confusion = confusion_matrix(truth, prediction)
33 | 
34 |     val_dict = {
35 |         cst.Metrics.F1.value:          float(f1score),
36 |         cst.Metrics.F1_W.value:        float(f1score_w),
37 |         cst.Metrics.PRECISION.value:   float(precision),
38 |         cst.Metrics.PRECISION_W.value: float(precision_w),
39 |         cst.Metrics.RECALL.value:      float(recall),
40 |         cst.Metrics.RECALL_W.value:    float(recall_w),
41 |         cst.Metrics.ACCURACY.value:    float(accuracy),
42 |         cst.Metrics.MCC.value:         float(mcc),
43 |         cst.Metrics.COK.value:         float(cok),
44 |         cst.Metrics.LOSS.value:        float(np.sum(loss_vals)),
45 |         cst.Metrics.CM.value:          mat_confusion.tolist()
46 |     }
47 |     return val_dict
48 | 


--------------------------------------------------------------------------------
/src/models/binctabl/binctabl.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch.nn as nn
 3 | import torch
 4 | from src.models.binctabl.base import BiN, BL_layer, TABL_layer
 5 | 
 6 | from src.models.lobcast_model import LOBCAST_model, LOBCAST_module
 7 | from src.hyper_parameters import HPTunable
 8 | 
 9 | 
10 | class BinCTABL(LOBCAST_model):
11 |     def __init__(self,
12 |                  input_dim,
13 |                  output_dim,
14 |                  d2, d1, t1, t2, d3, t3, d4, t4):
15 |         super().__init__(input_dim, output_dim)
16 | 
17 |         self.BiN = BiN(d2, d1, t1, t2)
18 |         self.BL = BL_layer(d2, d1, t1, t2)
19 |         self.BL2 = BL_layer(d3, d2, t2, t3)
20 |         self.TABL = TABL_layer(d4, d3, t3, t4)
21 |         self.dropout = nn.Dropout(0.1)
22 | 
23 |     def forward(self, x):
24 |         # first of all we pass the input to the BiN layer, then we use the C(TABL) architecture
25 |         x = torch.permute(x, (0, 2, 1))
26 | 
27 |         x = self.BiN(x)
28 | 
29 |         self.max_norm_(self.BL.W1.data)
30 |         self.max_norm_(self.BL.W2.data)
31 |         x = self.BL(x)
32 |         x = self.dropout(x)
33 | 
34 |         self.max_norm_(self.BL2.W1.data)
35 |         self.max_norm_(self.BL2.W2.data)
36 |         x = self.BL2(x)
37 |         x = self.dropout(x)
38 | 
39 |         self.max_norm_(self.TABL.W1.data)
40 |         self.max_norm_(self.TABL.W.data)
41 |         self.max_norm_(self.TABL.W2.data)
42 |         x = self.TABL(x)
43 |         x = torch.squeeze(x)
44 |         x = torch.softmax(x, 1)
45 |         return x
46 | 
47 |     def max_norm_(self, w):
48 |         with torch.no_grad():
49 |             if (torch.linalg.matrix_norm(w) > 10.0):
50 |                 norm = torch.linalg.matrix_norm(w)
51 |                 desired = torch.clamp(norm, min=0.0, max=10.0)
52 |                 w *= (desired / (1e-8 + norm))
53 | 
54 | 
55 | class HP(HPTunable):
56 |     def __init__(self):
57 |         super().__init__()
58 |         self.d1 = {"values": [40]}
59 |         self.d2 = {"values": [60]}
60 |         self.d3 = {"values": [120]}
61 |         self.d4 = {"values": [3]}
62 | 
63 |         self.t1 = {"values": [10]}
64 |         self.t2 = {"values": [10]}
65 |         self.t3 = {"values": [5]}
66 |         self.t4 = {"values": [1]}
67 | 
68 | 
69 | BinCTABL_ml = LOBCAST_module(BinCTABL, HP())
70 | 


--------------------------------------------------------------------------------
/src/metrics/report.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import src.constants as cst
 3 | from src.utils.utils_generic import read_json
 4 | 
 5 | 
 6 | saved_metrics = [
 7 |     cst.Metrics.F1.value,
 8 |     cst.Metrics.F1_W.value,
 9 |     cst.Metrics.PRECISION.value,
10 |     cst.Metrics.PRECISION_W.value,
11 |     cst.Metrics.RECALL.value,
12 |     cst.Metrics.RECALL_W.value,
13 |     cst.Metrics.ACCURACY.value,
14 |     cst.Metrics.MCC.value,
15 |     cst.Metrics.COK.value,
16 |     cst.Metrics.LOSS.value,
17 | ]
18 | 
19 | 
20 | def plot_metric_training(json_data_path, metric, pdf):
21 |     json_data = read_json(json_data_path)
22 | 
23 |     # Extract data
24 |     data_train = json_data[cst.ModelSteps.TRAINING.value]
25 |     epochs_train = sorted(map(int, data_train.keys()))
26 |     metric_values_train = [data_train[str(epoch)][metric] for epoch in epochs_train]
27 | 
28 |     data_val = json_data[cst.ModelSteps.VALIDATION.value]
29 |     epochs_val = sorted(map(int, data_val.keys()))
30 |     metric_values_val = [data_val[str(epoch)][metric] for epoch in epochs_val]
31 | 
32 |     # Plotting
33 |     plt.figure(figsize=(5, 5))
34 |     plt.plot(epochs_train, metric_values_train, label=cst.ModelSteps.TRAINING.value, marker='.')
35 |     plt.plot(epochs_val, metric_values_val, label=cst.ModelSteps.VALIDATION.value, marker='.')
36 | 
37 |     plt.title(f'{metric.capitalize()} vs. Epochs')
38 |     plt.xlabel('Epochs')
39 |     plt.ylabel(metric.capitalize())
40 |     plt.legend()
41 |     plt.grid(True, alpha=0.2)
42 | 
43 |     if metric not in [cst.Metrics.LOSS.value, cst.Metrics.CM.value]:
44 |         plt.ylim(-0.05, 1.05)
45 | 
46 |     plt.tight_layout()
47 |     pdf.savefig(plt.gcf())
48 |     plt.close()
49 | 
50 | 
51 | def plot_metric_best(json_data_path, metric, pdf):
52 |     json_data = read_json(json_data_path)
53 | 
54 |     # Extract data
55 |     data_test = json_data[cst.ModelSteps.TESTING.value]
56 |     epochs_test = sorted(map(int, data_test.keys()))
57 |     metric_values_test = [data_test[str(epoch)][metric] for epoch in epochs_test]
58 | 
59 |     data_val = json_data["validation"]
60 |     epochs_val = sorted(map(int, data_val.keys()))
61 |     metric_values_val = [data_val[str(epoch)][metric] for epoch in epochs_val]
62 | 
63 |     # Plotting
64 |     plt.figure(figsize=(5, 5))
65 |     plt.bar([cst.ModelSteps.TESTING.value, cst.ModelSteps.VALIDATION.value], metric_values_test + metric_values_val, color=['blue', 'green'])
66 | 
67 |     plt.title(f'{metric.capitalize()} vs. Epochs')
68 |     plt.xlabel('Epochs')
69 |     plt.ylabel(metric.capitalize())
70 |     plt.grid(True, alpha=0.2)
71 | 
72 |     if metric not in [cst.Metrics.LOSS.value, cst.Metrics.CM.value]:
73 |         plt.ylim(-0.05, 1.05)
74 | 
75 |     plt.tight_layout()
76 |     pdf.savefig(plt.gcf())
77 |     plt.close()


--------------------------------------------------------------------------------
/src/constants.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | 
  3 | 
  4 | class Optimizers(Enum):
  5 |     ADAM = "Adam"
  6 |     RMSPROP = "RMSprop"
  7 |     SGD = "SGD"
  8 | 
  9 | 
 10 | class Metrics(Enum):
 11 |     LOSS = 'loss'
 12 |     CM = 'cm'
 13 |     F1 = 'f1'
 14 |     F1_W = 'f1_w'
 15 | 
 16 |     PRECISION = 'precision'
 17 |     PRECISION_W = 'precision_w'
 18 | 
 19 |     RECALL = 'recall'
 20 |     RECALL_W = 'recall_w'
 21 | 
 22 |     ACCURACY = 'accuracy'
 23 |     MCC = 'mcc'
 24 |     COK = 'cohen-k'
 25 | 
 26 | 
 27 | class ModelSteps(Enum):
 28 |     TRAINING = "training"
 29 |     VALIDATION = "validation"  # final validation
 30 |     TESTING = "testing"
 31 | 
 32 | 
 33 | VALIDATION_METRIC = "{}_{}".format(ModelSteps.VALIDATION.value, Metrics.F1.value)
 34 | 
 35 | 
 36 | class NormalizationType(Enum):
 37 |     Z_SCORE = 0
 38 |     DYNAMIC = 1
 39 |     NONE = 2
 40 |     MINMAX = 3
 41 |     DECPRE = 4
 42 | 
 43 | 
 44 | class FI_Horizons(Enum):
 45 |     K1 = 1
 46 |     K2 = 2
 47 |     K3 = 3
 48 |     K5 = 5
 49 |     K10 = 10
 50 | 
 51 | 
 52 | class Predictions(Enum):
 53 |     DOWNWARD = 0
 54 |     STATIONARY = 1
 55 |     UPWARD = 2
 56 | 
 57 | 
 58 | from src.models.models_classes import *
 59 | # to use in the future
 60 | 
 61 | 
 62 | class DatasetFamily(str, Enum):
 63 |     FI = "FI"
 64 |     LOB = "Lobster"
 65 |     META = "Meta"
 66 | 
 67 | 
 68 | HORIZONS_MAPPINGS_FI = {
 69 |     1: -5,
 70 |     2: -4,
 71 |     3: -3,
 72 |     5: -2,
 73 |     10: -1
 74 | }
 75 | 
 76 | HORIZONS_MAPPINGS_LOBSTER = {
 77 |     10: -5,
 78 |     20: -4,
 79 |     30: -3,
 80 |     50: -2,
 81 |     100: -1
 82 | }
 83 | 
 84 | 
 85 | class OrderEvent(Enum):
 86 |     """ The possible kind of orders in the lob """
 87 |     SUBMISSION = 1
 88 |     CANCELLATION = 2
 89 |     DELETION = 3
 90 |     EXECUTION = 4
 91 |     HIDDEN_EXECUTION = 5
 92 |     CROSS_TRADE = 6
 93 |     TRADING_HALT = 7
 94 |     OTHER = 8
 95 | 
 96 | 
 97 | class DatasetType(Enum):
 98 |     TRAIN = "train"
 99 |     TEST = "test"
100 |     VALIDATION = "val"
101 | 
102 | 
103 | DOWNLOAD_FI_COMMAND = ("wget --content-disposition \"https://download.fairdata.fi:443/download?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MTEyMzAxODksImRhdGFzZXQiOiI3M2ViNDhkNy00ZGJjLTRhMTAtYTUyYS1kYTc0NWI0N2E2NDkiLCJwYWNrYWdlIjoiNzNlYjQ4ZDctNGRiYy00YTEwLWE1MmEtZGE3NDViNDdhNjQ5X2JoeXV4aWZqLnppcCIsImdlbmVyYXRlZF9ieSI6IjlmZGRmZmVlLWY4ZDItNDZkNS1hZmIwLWQyOTM0NzdlZjg2ZiIsInJhbmRvbV9zYWx0IjoiYjVkYzQxOTAifQ.bgDP51aFumRtPMbJUtUcjhpnu-O6nI6OYZlDbc3lrfQ\"")
104 | 
105 | 
106 | class ExpIndependentVariables(Enum):
107 |     MODEL = 'model'
108 |     K_FI = 'k'
109 |     FORWARD_WIN = 'fw'
110 |     BACKWARD_WIN = 'bw'
111 | 
112 | 
113 | N_LOB_LEVELS = 10
114 | NUM_CLASSES = 3
115 | 
116 | PROJECT_NAME = "LOBCAST"
117 | VERSION = 2.0
118 | 
119 | PROJECT_NAME_VERSION = f"{PROJECT_NAME}-v{VERSION}"
120 | DIR_EXPERIMENTS = f"data/experiments/{PROJECT_NAME_VERSION}"
121 | DIR_SAVED_MODEL = f"data/saved_models/{PROJECT_NAME_VERSION}"
122 | DATASET_FI = "data/datasets/FI-2010/BenchmarkDatasets/"
123 | 
124 | METRICS_RUNNING_FILE_NAME = "metrics_train.json"
125 | METRICS_BEST_FILE_NAME = "metrics_best.json"
126 | WANDB_SWEEP_MAX_RUNS = 20
127 | 
128 | 
129 | class UnitHorizon(Enum):
130 |     SECONDS = "seconds"
131 |     HOURS = "hours"
132 |     MINUTES = "minutes"
133 |     DAYS = "days"
134 |     EVENTS = "events"
135 | 


--------------------------------------------------------------------------------
/src/models/cnn1/cnn1.py:
--------------------------------------------------------------------------------
  1 | # Forecasting Stock Prices from the Limit Order Book using Convolutional Neural Networks
  2 | # Source: https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8010701
  3 | 
  4 | import pytorch_lightning as pl
  5 | from torch import nn
  6 | 
  7 | import src.models.lobcast_model
  8 | from src.models.lobcast_model import LOBCAST_model, LOBCAST_module
  9 | from src.hyper_parameters import HPTunable
 10 | 
 11 | 
 12 | class CNN1(LOBCAST_model):
 13 | 
 14 |     def __init__(self, input_dim, output_dim):
 15 |         super().__init__(input_dim, output_dim)
 16 | 
 17 |         n_features = input_dim[1]
 18 | 
 19 |         # Convolution 1
 20 |         self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(4, n_features), padding=(3, 0), dilation=(2, 1))
 21 |         self.relu1 = nn.LeakyReLU()
 22 | 
 23 |         # Convolution 2
 24 |         self.conv2 = nn.Conv1d(in_channels=16, out_channels=16, kernel_size=(4,))
 25 |         self.relu2 = nn.LeakyReLU()
 26 | 
 27 |         # Max pool 1
 28 |         self.maxpool1 = nn.MaxPool1d(kernel_size=2)
 29 | 
 30 |         # Convolution 3
 31 |         self.conv3 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=(3,), padding=2)
 32 |         self.relu3 = nn.LeakyReLU()
 33 | 
 34 |         # Convolution 4
 35 |         self.conv4 = nn.Conv1d(in_channels=32, out_channels=32, kernel_size=(3,), padding=2)
 36 |         self.relu4 = nn.LeakyReLU()
 37 | 
 38 |         # Max pool 2
 39 |         self.maxpool2 = nn.MaxPool1d(kernel_size=2)
 40 | 
 41 |         # Fully connected 1
 42 |         self.fc1 = nn.Linear(26*32, 32)
 43 |         self.relu5 = nn.LeakyReLU()
 44 | 
 45 |         # Fully connected 2
 46 |         self.fc2 = nn.Linear(32, output_dim)
 47 | 
 48 |     def forward(self, x):
 49 |         # Adding the channel dimension
 50 |         x = x[:, None, :]  # x.shape = [batch_size, 1, 100, 40]
 51 | 
 52 |         # print('x.shape:', x.shape)
 53 | 
 54 |         # Convolution 1
 55 |         out = self.conv1(x)
 56 |         out = self.relu1(out)
 57 |         out = out.reshape(out.shape[0], out.shape[1], -1)
 58 |         # print('After convolution1:', out.shape)
 59 | 
 60 |         # Convolution 2
 61 |         out = self.conv2(out)
 62 |         out = self.relu2(out)
 63 |         # print('After convolution2:', out.shape)
 64 | 
 65 |         # Max pool 1
 66 |         out = self.maxpool1(out)
 67 |         # print('After maxpool1:', out.shape)
 68 | 
 69 |         # Convolution 3
 70 |         out = self.conv3(out)
 71 |         out = self.relu3(out)
 72 |         # print('After convolution3:', out.shape)
 73 | 
 74 |         # Convolution 4
 75 |         out = self.conv4(out)
 76 |         out = self.relu4(out)
 77 |         # print('After convolution4:', out.shape)
 78 | 
 79 |         # Max pool 2
 80 |         out = self.maxpool2(out)
 81 |         # print('After maxcpool2:', out.shape)
 82 | 
 83 |         # flatten
 84 |         out = out.view(out.size(0), -1)
 85 |         # print('After flatten:', out.shape)
 86 | 
 87 |         # Linear function 1
 88 |         out = self.fc1(out)
 89 |         out = self.relu5(out)
 90 |         # print('After linear1:', out.shape)
 91 | 
 92 |         # Linear function (readout)
 93 |         out = self.fc2(out)
 94 |         # print('After linear2:', out.shape)
 95 | 
 96 |         return out
 97 | 
 98 | 
 99 | CNN_lm = LOBCAST_module(CNN1)
100 | 


--------------------------------------------------------------------------------
/src/utils/ultils_run.py:
--------------------------------------------------------------------------------
 1 | import wandb
 2 | import src.constants as cst
 3 | import itertools
 4 | 
 5 | 
 6 | def wandb_init(sim):
 7 |     def wandb_lunch(sim):  # runs multiple instances
 8 |         with wandb.init() as wandb_instance:
 9 |             sim.update_hyper_parameters(wandb_instance.config)
10 |             sim.end_setup(wandb_instance)
11 | 
12 |             wandb_instance.log({k: str(v) for k, v in sim.SETTINGS.__dict__.items()})
13 |             sim.run()
14 |             sim.evaluate()
15 |             sim.close()
16 | 
17 |     sweep_id = wandb.sweep(project=cst.PROJECT_NAME_VERSION, sweep={
18 |         'method': sim.SETTINGS.WANDB_SWEEP_METHOD,
19 |         "metric": {"goal": "maximize", "name": cst.VALIDATION_METRIC},
20 |         'parameters': sim.HP_TUNABLE.__dict__,
21 |         'description': str(sim.SETTINGS) + str(sim.HP_TUNABLE),
22 |     })
23 |     return sweep_id, wandb_lunch
24 | 
25 | 
26 | def grid_search_configurations(tunable_variables, n_steps=3):
27 |     """ Given a set of parameters to tune of the form
28 | 
29 |     { p1: {"values": [v1, v2, v3]},
30 |       p2: {"max": 1, "min": 0}, ... }
31 | 
32 |     returns the configurations associated with a grid search in the form:
33 |     [ {p1:v1, p2:v1}, {p1:v1, v2}, ... ]
34 |     """
35 |     all_domains = []
36 |     for name, domain in tunable_variables.items():
37 |         # continuous variable
38 |         if 'min' in domain:
39 |             step = (domain['max'] - domain['min']) / n_steps
40 |             all_domains += [[domain['min'] + step * i for i in range(n_steps)]]
41 |             print(f"Warning! Param {name} domain {domain} was discretized! In {n_steps} steps as {all_domains}.")
42 | 
43 |         # discrete variable
44 |         elif 'values' in domain:
45 |             all_domains += [domain['values']]
46 |     configurations_tuples = itertools.product(*all_domains)
47 | 
48 |     # from tuples [(v1, v2, v3)] to [{p1: v1}, ...]
49 |     configurations_dicts = [{k: v for k, v in zip(tunable_variables.keys(), selected_values)} for selected_values in configurations_tuples]
50 |     return configurations_dicts
51 | 
52 | 
53 | class ExecutionPlan:
54 |     def __init__(self, plan, constraints):
55 |         self.plan = plan
56 |         self.constraints = constraints
57 | 
58 |     def configurations(self):
59 |         """
60 |         Generate configurations based on the execution plan and constraints.
61 |         Returns: list: A list of dictionaries representing configurations for LOBCAST Settings,
62 |                        where keys are variable names and values are the corresponding values.
63 |         """
64 |         all_domains = [list(dom) for dom in self.plan.values()]
65 |         configurations_attempts = list(itertools.product(*all_domains))
66 | 
67 |         chosen_configurations = set(configurations_attempts)
68 |         if len(self.constraints) > 0:
69 |             chosen_configurations = set()
70 |             for fixed_var, fixed_value in self.constraints.items():
71 |                 for configuration in configurations_attempts:
72 |                     vf_index = list(self.plan.keys()).index(fixed_var)
73 |                     if configuration[vf_index] == fixed_value:
74 |                         chosen_configurations |= {configuration}
75 | 
76 |         out_con = []
77 |         for co_tup in chosen_configurations:
78 |             co_dic = {k.value: co_tup[i] for i, k in enumerate(self.plan.keys())}
79 |             out_con.append(co_dic)
80 |         return out_con
81 | 


--------------------------------------------------------------------------------
/src/models/cnn2/cnn2.py:
--------------------------------------------------------------------------------
  1 | # Using Deep Learning for price prediction by exploiting stationary limit order book features
  2 | # Source: https://www.sciencedirect.com/science/article/pii/S1568494620303410
  3 | 
  4 | import pytorch_lightning as pl
  5 | from torch import nn
  6 | 
  7 | from src.models.lobcast_model import LOBCAST_model, LOBCAST_module
  8 | from src.hyper_parameters import HPTunable
  9 | 
 10 | 
 11 | class CNN2(LOBCAST_model):
 12 |     def __init__(self, input_dim, output_dim):
 13 |         super().__init__(input_dim, output_dim)
 14 | 
 15 |         # Convolution 1
 16 |         self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(10, 42), padding=(0, 2))
 17 |         self.bn1 = nn.BatchNorm2d(16)
 18 |         self.prelu1 = nn.PReLU()
 19 | 
 20 |         # Convolution 2
 21 |         self.conv2 = nn.Conv1d(in_channels=16, out_channels=16, kernel_size=(10,))  # 3
 22 |         self.bn2 = nn.BatchNorm1d(16)
 23 |         self.prelu2 = nn.PReLU()
 24 | 
 25 |         # Convolution 3
 26 |         self.conv3 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=(8,))  # 1
 27 |         self.bn3 = nn.BatchNorm1d(32)
 28 |         self.prelu3 = nn.PReLU()
 29 | 
 30 |         # Convolution 4
 31 |         self.conv4 = nn.Conv1d(in_channels=32, out_channels=32, kernel_size=(6,))  # 1
 32 |         self.bn4 = nn.BatchNorm1d(32)
 33 |         self.prelu4 = nn.PReLU()
 34 | 
 35 |         # Convolution 5
 36 |         self.conv5 = nn.Conv1d(in_channels=32, out_channels=32, kernel_size=(4,))  # 1
 37 |         self.bn5 = nn.BatchNorm1d(32)
 38 |         self.prelu5 = nn.PReLU()
 39 | 
 40 |         # Fully connected 1
 41 |         self.fc1 = nn.Linear(249 * 32, 32)
 42 |         self.prelu6 = nn.PReLU()
 43 | 
 44 |         # Fully connected 2
 45 |         self.fc2 = nn.Linear(32, output_dim)
 46 | 
 47 |     def forward(self, x):
 48 |         # Adding the channel dimension
 49 |         x = x[:, None, :]  # x.shape = [batch_size, 1, 100, 40]
 50 | 
 51 |         # print('x.shape:', x.shape)
 52 | 
 53 |         # Convolution 1
 54 |         out = self.conv1(x)
 55 |         # print('After convolution1:', out.shape)
 56 | 
 57 |         out = self.bn1(out)
 58 |         # print('After bn1:', out.shape)
 59 | 
 60 |         out = self.prelu1(out)
 61 |         out = out.reshape(out.shape[0], out.shape[1], -1)
 62 |         # print('After prelu1:', out.shape)
 63 | 
 64 |         # Convolution 2
 65 |         out = self.conv2(out)
 66 |         out = self.bn2(out)
 67 |         out = self.prelu2(out)
 68 |         # print('After convolution2, bn2, prelu2:', out.shape)
 69 | 
 70 |         # Convolution 3
 71 |         out = self.conv3(out)
 72 |         out = self.bn3(out)
 73 |         out = self.prelu3(out)
 74 |         # print('After convolution3, bn3, prelu3:', out.shape)
 75 | 
 76 |         # Convolution 4
 77 |         out = self.conv4(out)
 78 |         out = self.bn4(out)
 79 |         out = self.prelu4(out)
 80 |         # print('After convolution4, bn4, prelu4:', out.shape)
 81 | 
 82 |         # Convolution 5
 83 |         out = self.conv5(out)
 84 |         out = self.bn5(out)
 85 |         out = self.prelu5(out)
 86 |         # print('After convolution5, bn5, prelu5:', out.shape)
 87 | 
 88 |         # flatten
 89 |         out = out.view(out.size(0), -1)
 90 |         # print('After flatten:', out.shape)
 91 | 
 92 |         # Linear function 1
 93 |         out = self.fc1(out)
 94 |         out = self.prelu6(out)
 95 |         # print('After fc1:', out.shape)
 96 | 
 97 |         # Linear function (readout)
 98 |         out = self.fc2(out)
 99 |         # print('After fc2:', out.shape)
100 | 
101 |         return out
102 | 
103 | 
104 | CNN2_ml = LOBCAST_module(CNN2, tunable_parameters=None)
105 | 


--------------------------------------------------------------------------------
/src/settings.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import numpy as np
 4 | import torch
 5 | import src.constants as cst
 6 | import multiprocessing
 7 | 
 8 | import src.settings
 9 | 
10 | np.set_printoptions(suppress=True)
11 | from src.utils.utils_generic import dict_to_string
12 | from enum import Enum
13 | 
14 | 
15 | class SettingsExp(Enum):
16 |     SEED = "SEED"
17 |     PREDICTION_MODEL = "PREDICTION_MODEL"
18 |     PREDICTION_HORIZON_FUTURE = "PREDICTION_HORIZON_FUTURE"
19 |     PREDICTION_HORIZON_PAST = "PREDICTION_HORIZON_PAST"
20 |     OBSERVATION_PERIOD = "OBSERVATION_PERIOD"
21 | 
22 | 
23 | class Settings:
24 |     """ A class with all the settings of the simulations. Settings are set at runtime from command line. """
25 |     def __init__(self):
26 | 
27 |         self.SEED: int = 0
28 |         """ The random seed of the simulation. """
29 | 
30 |         self.DATASET_NAME: cst.DatasetFamily = cst.DatasetFamily.FI
31 |         """ Name of the dataset to run tests on. """
32 | 
33 |         self.N_TRENDS = 3
34 |         """ The number of trends to use for predictions. """
35 | 
36 |         self.PREDICTION_MODEL = cst.Models.MLP
37 |         self.PREDICTION_HORIZON_UNIT: cst.UnitHorizon = cst.UnitHorizon.EVENTS
38 |         """ The time unit for time series discretization. """
39 | 
40 |         self.PREDICTION_HORIZON_FUTURE: int = 5
41 |         self.PREDICTION_HORIZON_PAST: int = 1
42 |         self.OBSERVATION_PERIOD: int = 100
43 |         self.IS_SHUFFLE_TRAIN_SET = True
44 | 
45 |         self.EPOCHS_UB = 30
46 |         """ The number of training epochs. """
47 | 
48 |         self.TRAIN_SET_PORTION = .8
49 |         self.VALIDATION_EVERY = 1
50 | 
51 |         self.IS_TEST_ONLY = False
52 |         """ Whether or not to run the simulation in test mode. If True, no train or validation are performed. """
53 | 
54 |         self.TEST_MODEL_PATH: str = "data/saved_models/LOBCAST-(15-03-2024_20-23-49)/epoch=2-validation_f1=0.27.ckpt"
55 |         """ The path to the model to test. """
56 | 
57 |         self.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
58 |         self.N_GPUs = None if self.DEVICE == 'cpu' else torch.cuda.device_count()
59 |         self.N_CPUs = multiprocessing.cpu_count()
60 | 
61 |         self.DIR_EXPERIMENTS = ""
62 |         self.IS_WANDB = True
63 |         self.WANDB_SWEEP_METHOD = 'grid'
64 |         """ Whether or not to use wandb. """
65 | 
66 |         self.IS_SANITY_CHECK = False
67 |         """ Whether or not to use sanity checks. """
68 | 
69 |     def check_parameters_validity(self):
70 |         """ Checks if the parameters set at runtime are valid. """
71 |         CONSTRAINTS = []
72 |         c1 = (not self.IS_TEST_ONLY or os.path.exists(self.TEST_MODEL_PATH), "If IS_TEST_ONLY, then test model should exist.")
73 | 
74 |         c2 = (not self.DATASET_NAME == cst.DatasetFamily.FI or self.PREDICTION_HORIZON_UNIT == cst.UnitHorizon.EVENTS,
75 |               f"FI-2010 Dataset can handle only event based granularity, {self.PREDICTION_HORIZON_UNIT} given.")
76 | 
77 |         c3 = (not self.DATASET_NAME == cst.DatasetFamily.FI or self.PREDICTION_HORIZON_PAST == 1,
78 |               f"FI-2010 Dataset can handle only 1 event in the past horizon, {self.PREDICTION_HORIZON_PAST} given.")
79 | 
80 |         c4 = (not self.DATASET_NAME == cst.DatasetFamily.FI or self.PREDICTION_HORIZON_FUTURE in [1, 2, 3, 5, 10],
81 |               f"FI-2010 Dataset can handle only {1, 2, 3, 5, 10} events in the future horizon, {self.PREDICTION_HORIZON_FUTURE} given.")
82 | 
83 |         c5 = (not self.DATASET_NAME == cst.DatasetFamily.FI or self.N_TRENDS == 3,
84 |               f"FI-2010 Dataset can handle only 3 trends, {self.N_TRENDS} given.")
85 | 
86 |         c6 = (not self.PREDICTION_MODEL == cst.Models.BINCTABL or self.OBSERVATION_PERIOD == 10,
87 |               f"At the moment, BINCTABL only allows OBSERVATION_PERIOD = 10, {self.OBSERVATION_PERIOD} given.")
88 | 
89 |         CONSTRAINTS += [c1, c2, c3, c4, c5, c6]
90 |         for constrain, description in CONSTRAINTS:
91 |             if not constrain:
92 |                 raise ValueError(f"Constraint not met! {description} Check your parameters.")
93 | 
94 |     def __repr__(self):
95 |         return dict_to_string(self.__dict__)
96 | 


--------------------------------------------------------------------------------
/src/utils/utils_generic.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pickle
  3 | import os
  4 | import json
  5 | import platform, socket, re, uuid, psutil, logging
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | import inspect
  9 | 
 10 | 
 11 | def get_class_arguments(obj):
 12 |     signature = inspect.signature(obj)
 13 |     parameters = signature.parameters
 14 |     arguments = [param.name for param in parameters.values()]  # list of arguments
 15 |     return arguments
 16 | 
 17 | import src.constants as cst
 18 | 
 19 | 
 20 | def read_data(fname):
 21 |     with open(fname, 'rb') as handle:
 22 |         out_df = pickle.load(handle)
 23 |     return out_df
 24 | 
 25 | 
 26 | def write_data(data, path, fname):
 27 |     with open(path + fname, 'wb') as handle:
 28 |         os.makedirs(path, exist_ok=True)
 29 |         pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
 30 | 
 31 | 
 32 | def write_json(msg, fname):
 33 |     with open(fname, 'w') as fp:
 34 |         json.dump(msg, fp)
 35 | 
 36 | 
 37 | def read_json(fname):
 38 |     data = None
 39 |     if os.path.exists(fname):
 40 |         with open(fname, 'r') as fp:
 41 |             data = json.load(fp)
 42 |     else:
 43 |         print("File", fname, "does not exist.")
 44 |     return data
 45 | 
 46 | 
 47 | def is_jsonable(x):
 48 |     try:
 49 |         json.dumps(x)
 50 |         return True
 51 |     except (TypeError, OverflowError):
 52 |         return False
 53 | 
 54 | 
 55 | def get_sys_info():
 56 |     info = dict()
 57 |     info['platform'] = platform.system()
 58 |     info['platform-release'] = platform.release()
 59 |     info['platform-version'] = platform.version()
 60 |     info['architecture'] = platform.machine()
 61 |     info['hostname'] = socket.gethostname()
 62 |     info['ip-address'] = socket.gethostbyname(socket.gethostname())
 63 |     info['mac-address'] = ':'.join(re.findall('..', '%012x' % uuid.getnode()))
 64 |     info['processor'] = platform.processor()
 65 |     info['ram'] = str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB"
 66 |     print(info)
 67 | 
 68 | 
 69 | def get_sys_mac():
 70 |     return ':'.join(re.findall('..', '%012x' % uuid.getnode()))
 71 | 
 72 | 
 73 | def get_index_from_window(config):
 74 |     if config.DATASET_NAME == cst.DatasetFamily.FI:
 75 |         return cst.HORIZONS_MAPPINGS_FI[config.HYPER_PARAMETERS[cst.LearningHyperParameter.FI_HORIZON]]
 76 |     elif config.DATASET_NAME == cst.DatasetFamily.LOB:
 77 |         return cst.HORIZONS_MAPPINGS_LOBSTER[config.HYPER_PARAMETERS[cst.LearningHyperParameter.FORWARD_WINDOW.value]]
 78 | 
 79 | 
 80 | def sample_color(index, cmap='tab10'):
 81 |     # 1. Choose your desired colormap
 82 |     cmap = plt.get_cmap(cmap)
 83 | 
 84 |     # 2. Segmenting the whole range (from 0 to 1) of the color map into multiple segments
 85 |     colors = [cmap(x) for x in range(cmap.N)]
 86 |     assert index < cmap.N
 87 | 
 88 |     # 3. Color the i-th line with the i-th color, i.e. slicedCM[i]
 89 |     color = colors[index]
 90 |     return color
 91 | 
 92 | 
 93 | def sample_marker(index):
 94 |     MARKERS = ["s", "p", "P", "*", "h", "H", "+", "x", "X", "D", "d", "|", "_", 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ".", ",", "o", "v", "^", "<", ">", "1", "2", "3", "4", "8"]
 95 |     return MARKERS[index]
 96 | 
 97 | 
 98 | def sample_pattern(index):
 99 |     MARKERS = ['/', '\\', '|', '-', '+', 'x', 'o', 'O', '.', '*'] + ['/o', '\\|', '|*', '-\\', '+o', 'x*', 'o-', 'O|', 'O.', '*-']
100 |     return MARKERS[index]
101 | 
102 | 
103 | def sample_line(index):
104 |     MARKERS = ['-', '--', '-.', ':', 'None', ' ', '', 'solid', 'dashed', 'dashdot', 'dotted', 'loosely dotted', 'densely dotted', 'loosely dashed', 'densely dashed', 'loosely dashdotted', 'densely dashdotted', 'loosely dashdotdotted', 'dashdotdotted', 'densely dashdotdotted']
105 |     return MARKERS[index]
106 | 
107 | 
108 | def make_dir(path):
109 |     if not os.path.exists(path):
110 |         os.makedirs(path)
111 | 
112 | 
113 | def dict_to_string(dictionary):
114 |     rep = ""
115 |     for key, value in dictionary.items():
116 |         rep += f"{key}: {value}\n"
117 |     return rep
118 | 
119 | 
120 | def str_to_bool(string: str):
121 |     string = string.lower()
122 |     return string in ['true', '1', 't', 'y', 'yes']
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/src/utils/util_training.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import pytorch_lightning as pl
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | import torch.nn as nn
  8 | import src.constants as cst
  9 | from src.metrics.metrics_learning import compute_metrics
 10 | 
 11 | 
 12 | class LOBCAST_NNEngine(pl.LightningModule):
 13 |     def __init__(self, neural_architecture, loss_weights, hps, metrics_log, wandb_log):
 14 |         super().__init__()
 15 |         self.neural_architecture = neural_architecture
 16 |         self.loss_weights = loss_weights
 17 |         self.hps = hps
 18 |         self.metrics_log = metrics_log
 19 |         self.wandb_log = wandb_log
 20 | 
 21 |     def log_wandb(self, metrics):
 22 |         if self.wandb_log:
 23 |             self.wandb_log.log(metrics)
 24 | 
 25 |     def forward(self, batch):
 26 |         # time x features - 40 x 100 in general
 27 |         out = self.neural_architecture(batch)
 28 |         logits = nn.Softmax(dim=1)(out)  # todo check if within model
 29 |         return out, logits
 30 | 
 31 |     def training_step(self, batch, batch_idx):
 32 |         prediction_ind, y, loss_val, logits = self.make_predictions(batch)
 33 |         return {"loss": loss_val, "other": (prediction_ind, y, loss_val, logits)}
 34 | 
 35 |     def validation_step(self, batch, batch_idx):
 36 |         prediction_ind, y, loss_val, logits = self.make_predictions(batch)
 37 |         return prediction_ind, y, loss_val, logits
 38 | 
 39 |     def test_step(self, batch, batch_idx):
 40 |         prediction_ind, y, loss_val, logits = self.make_predictions(batch)
 41 |         return prediction_ind, y, loss_val, logits
 42 | 
 43 |     def make_predictions(self, batch):
 44 |         x, y = batch
 45 |         out, logits = self(x)
 46 |         loss_val = nn.CrossEntropyLoss(self.loss_weights)(out, y)
 47 | 
 48 |         # deriving prediction from softmax probs
 49 |         prediction_ind = torch.argmax(logits, dim=1)  # B
 50 |         return prediction_ind, y, loss_val, logits
 51 | 
 52 |     def predict_step(self, batch, batch_idx, dataloader_idx=0):
 53 |         x, _ = batch
 54 |         t0 = time.time()
 55 |         self(x)
 56 |         torch.cuda.current_stream().synchronize()
 57 |         t1 = time.time()
 58 |         elapsed = t1 - t0
 59 |         print("Inference for the model:", elapsed, "ms")
 60 |         return elapsed
 61 | 
 62 |     def evaluate_classifier(self, stp_type, step_outputs):
 63 |         preds, truths, loss_vals, logits = self.__get_prediction_vectors(step_outputs)
 64 |         eval_dict = compute_metrics(truths, preds, loss_vals)
 65 | 
 66 |         var_name = "{}_{}".format(stp_type, cst.Metrics.LOSS.value)
 67 |         self.log(var_name, eval_dict[cst.Metrics.LOSS.value], prog_bar=True)
 68 | 
 69 |         var_name = "{}_{}".format(stp_type, cst.Metrics.F1.value)
 70 |         self.log(var_name, eval_dict[cst.Metrics.F1.value], prog_bar=True)
 71 | 
 72 |         path = cst.METRICS_BEST_FILE_NAME if self.metrics_log.is_best_model else cst.METRICS_RUNNING_FILE_NAME
 73 | 
 74 |         print("\n")
 75 |         print(f"END epoch {self.current_epoch} ({stp_type})")
 76 |         print("Logging stats...")
 77 |         self.metrics_log.add_metric(self.current_epoch, stp_type, eval_dict)
 78 |         self.metrics_log.dump_metrics(path)
 79 |         self.log_wandb({f"{stp_type}_{k}": v for k, v in eval_dict.items()})
 80 |         print("Done.")
 81 | 
 82 |     def training_epoch_end(self, training_step_outputs):
 83 |         training_step_outputs = [batch["other"] for batch in training_step_outputs]
 84 |         self.evaluate_classifier(cst.ModelSteps.TRAINING.value, training_step_outputs)
 85 | 
 86 |     def validation_epoch_end(self, validation_step_outputs):
 87 |         self.evaluate_classifier(cst.ModelSteps.VALIDATION.value, validation_step_outputs)
 88 | 
 89 |     def test_epoch_end(self, test_step_outputs):
 90 |         self.evaluate_classifier(cst.ModelSteps.TESTING.value, test_step_outputs)
 91 | 
 92 |     def __get_prediction_vectors(self, model_output):
 93 |         """ Accumulates the models output after each validation and testing epoch end. """
 94 | 
 95 |         preds, truths, losses, logits = [], [], [], []
 96 |         for preds_b, y_b, loss_val, logits_b in model_output:
 97 |             preds += preds_b.tolist()
 98 |             truths += y_b.tolist()
 99 |             logits += logits_b.tolist()
100 |             losses += [loss_val.item()]  # loss is single per batch
101 | 
102 |         preds  = np.array(preds)
103 |         truths = np.array(truths)
104 |         logits = np.array(logits)
105 |         losses = np.array(losses)
106 | 
107 |         return preds, truths, losses, logits
108 | 
109 |     def configure_optimizers(self):
110 |         if self.hps.OPTIMIZER == "SGD":
111 |             return torch.optim.SGD(self.parameters(), lr=self.hps.LEARNING_RATE)
112 |         elif self.hps.OPTIMIZER == "ADAM":
113 |             return torch.optim.Adam(self.parameters(), lr=self.hps.LEARNING_RATE)
114 |         elif self.hps.OPTIMIZER == "RMSPROP":
115 |             return torch.optim.RMSprop(self.parameters(), lr=self.hps.LEARNING_RATE)
116 | 


--------------------------------------------------------------------------------
/src/models/binctabl/base.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | class BiN(nn.Module):
  6 |     def __init__(self, d2, d1, t1, t2):
  7 |         super().__init__()
  8 |         self.t1 = t1
  9 |         self.d1 = d1
 10 |         self.t2 = t2
 11 |         self.d2 = d2
 12 | 
 13 |         bias1 = torch.Tensor(t1, 1)
 14 |         self.B1 = nn.Parameter(bias1)
 15 |         nn.init.constant_(self.B1, 0)
 16 | 
 17 |         l1 = torch.Tensor(t1, 1)
 18 |         self.l1 = nn.Parameter(l1)
 19 |         nn.init.xavier_normal_(self.l1)
 20 | 
 21 |         bias2 = torch.Tensor(d1, 1)
 22 |         self.B2 = nn.Parameter(bias2)
 23 |         nn.init.constant_(self.B2, 0)
 24 | 
 25 |         l2 = torch.Tensor(d1, 1)
 26 |         self.l2 = nn.Parameter(l2)
 27 |         nn.init.xavier_normal_(self.l2)
 28 | 
 29 |         y1 = torch.Tensor(1, )
 30 |         self.y1 = nn.Parameter(y1)
 31 |         nn.init.constant_(self.y1, 0.5)
 32 | 
 33 |         y2 = torch.Tensor(1, )
 34 |         self.y2 = nn.Parameter(y2)
 35 |         nn.init.constant_(self.y2, 0.5)
 36 | 
 37 |     def forward(self, x):
 38 | 
 39 |         # if the two scalars are negative then we setting them to 0
 40 |         if (self.y1[0] < 0):
 41 |             y1 = torch.cuda.FloatTensor(1, )
 42 |             self.y1 = nn.Parameter(y1)
 43 |             nn.init.constant_(self.y1, 0.01)
 44 | 
 45 |         if (self.y2[0] < 0):
 46 |             y2 = torch.cuda.FloatTensor(1, )
 47 |             self.y2 = nn.Parameter(y2)
 48 |             nn.init.constant_(self.y2, 0.01)
 49 | 
 50 |         # normalization along the temporal dimensione
 51 |         T2 = torch.ones([self.t1, 1])
 52 |         x2 = torch.mean(x, dim=2)
 53 |         x2 = torch.reshape(x2, (x2.shape[0], x2.shape[1], 1))
 54 | 
 55 |         std = torch.std(x, dim=2)
 56 |         std = torch.reshape(std, (std.shape[0], std.shape[1], 1))
 57 |         # it can be possible that the std of some temporal slices is 0, and this produces inf values, so we have to set them to one
 58 |         std[std < 1e-4] = 1
 59 | 
 60 |         diff = x - (x2 @ (T2.T))
 61 |         Z2 = diff / (std @ (T2.T))
 62 | 
 63 |         X2 = self.l2 @ T2.T
 64 |         X2 = X2 * Z2
 65 |         X2 = X2 + (self.B2 @ T2.T)
 66 | 
 67 |         # normalization along the feature dimension
 68 |         T1 = torch.ones([self.d1, 1])
 69 |         x1 = torch.mean(x, dim=1)
 70 |         x1 = torch.reshape(x1, (x1.shape[0], x1.shape[1], 1))
 71 | 
 72 |         std = torch.std(x, dim=1)
 73 |         std = torch.reshape(std, (std.shape[0], std.shape[1], 1))
 74 | 
 75 |         op1 = x1 @ T1.T
 76 |         op1 = torch.permute(op1, (0, 2, 1))
 77 | 
 78 |         op2 = std @ T1.T
 79 |         op2 = torch.permute(op2, (0, 2, 1))
 80 | 
 81 |         z1 = (x - op1) / (op2)
 82 |         X1 = (T1 @ self.l1.T)
 83 |         X1 = X1 * z1
 84 |         X1 = X1 + (T1 @ self.B1.T)
 85 | 
 86 |         # weighing the imporance of temporal and feature normalization
 87 |         x = self.y1 * X1 + self.y2 * X2
 88 | 
 89 |         return x
 90 | 
 91 | 
 92 | class TABL_layer(nn.Module):
 93 |     def __init__(self, d2, d1, t1, t2):
 94 |         super().__init__()
 95 |         self.t1 = t1
 96 | 
 97 |         weight = torch.Tensor(d2, d1)
 98 |         self.W1 = nn.Parameter(weight)
 99 |         nn.init.kaiming_uniform_(self.W1, nonlinearity='relu')
100 | 
101 |         weight2 = torch.Tensor(t1, t1)
102 |         self.W = nn.Parameter(weight2)
103 |         nn.init.constant_(self.W, 1 / t1)
104 | 
105 |         weight3 = torch.Tensor(t1, t2)
106 |         self.W2 = nn.Parameter(weight3)
107 |         nn.init.kaiming_uniform_(self.W2, nonlinearity='relu')
108 | 
109 |         bias1 = torch.Tensor(d2, t2)
110 |         self.B = nn.Parameter(bias1)
111 |         nn.init.constant_(self.B, 0)
112 | 
113 |         l = torch.Tensor(1, )
114 |         self.l = nn.Parameter(l)
115 |         nn.init.constant_(self.l, 0.5)
116 | 
117 |         self.activation = nn.ReLU()
118 | 
119 |     def forward(self, X):
120 | 
121 |         # maintaining the weight parameter between 0 and 1.
122 |         if (self.l[0] < 0):
123 |             l = torch.Tensor(1, )
124 |             self.l = nn.Parameter(l)
125 |             nn.init.constant_(self.l, 0.0)
126 | 
127 |         if (self.l[0] > 1):
128 |             l = torch.Tensor(1, )
129 |             self.l = nn.Parameter(l)
130 |             nn.init.constant_(self.l, 1.0)
131 | 
132 |         # modelling the dependence along the first mode of X while keeping the temporal order intact (7)
133 |         X = self.W1 @ X
134 | 
135 |         # enforcing constant (1) on the diagonal
136 |         W = self.W - self.W * torch.eye(self.t1, dtype=torch.float32) + torch.eye(self.t1, dtype=torch.float32) / self.t1
137 | 
138 |         # attention, the aim of the second step is to learn how important the temporal instances are to each other (8)
139 |         E = X @ W
140 | 
141 |         # computing the attention mask  (9)
142 |         A = torch.softmax(E, dim=-1)
143 | 
144 |         # applying a soft attention mechanism  (10)
145 |         # he attention mask A obtained from the third step is used to zero out the effect of unimportant elements
146 |         X = self.l[0] * (X) + (1.0 - self.l[0]) * X * A
147 | 
148 |         # the final step of the proposed layer estimates the temporal mapping W2, after the bias shift (11)
149 |         y = X @ self.W2 + self.B
150 |         return y
151 | 
152 | 
153 | class BL_layer(nn.Module):
154 |   def __init__(self, d2, d1, t1, t2):
155 |         super().__init__()
156 |         weight1 = torch.Tensor(d2, d1)
157 |         self.W1 = nn.Parameter(weight1)
158 |         nn.init.kaiming_uniform_(self.W1, nonlinearity='relu')
159 | 
160 |         weight2 = torch.Tensor(t1, t2)
161 |         self.W2 = nn.Parameter(weight2)
162 |         nn.init.kaiming_uniform_(self.W2, nonlinearity='relu')
163 | 
164 |         bias1 = torch.zeros((d2, t2))
165 |         self.B = nn.Parameter(bias1)
166 |         nn.init.constant_(self.B, 0)
167 | 
168 |         self.activation = nn.ReLU()
169 | 
170 |   def forward(self, x):
171 | 
172 |     x = self.activation(self.W1 @ x @ self.W2 + self.B)
173 | 
174 |     return x
175 | 


--------------------------------------------------------------------------------
/src/data_preprocessing/FI/FIDataBuilder.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import os.path
  3 | from collections import Counter
  4 | 
  5 | import src.constants as cst
  6 | import numpy as np
  7 | import tqdm
  8 | import torch
  9 | from pprint import pprint
 10 | from torch.utils import data
 11 | 
 12 | 
 13 | class FIDataset(data.Dataset):
 14 |     def __init__(
 15 |         self,
 16 |         dataset_path,
 17 |         dataset_type,
 18 |         horizon,
 19 |         observation_length,
 20 |         train_val_split,
 21 |         n_trends,
 22 |         auction=False,
 23 |         normalization_type=cst.NormalizationType.Z_SCORE,
 24 |     ):
 25 |         assert horizon in [1, 2, 3, 5, 10]
 26 | 
 27 |         self.fi_data_dir = dataset_path
 28 |         self.dataset_type = dataset_type
 29 |         self.train_val_split = train_val_split
 30 |         self.auction = auction
 31 |         self.normalization_type = normalization_type
 32 |         self.horizon = horizon
 33 |         self.observation_length = observation_length
 34 |         self.num_classes = n_trends
 35 | 
 36 |         # KEY call, generates the dataset
 37 |         self.data, self.samples_X, self.samples_y = None, None, None
 38 |         self.__prepare_dataset()
 39 | 
 40 |         _, occs = self.__class_balancing(self.samples_y)
 41 |         # LOSS_WEIGHTS_DICT = {m: 1e6 for m in cst.Models}
 42 |         LOSS_WEIGHT = 1e6
 43 |         self.loss_weights = torch.Tensor(LOSS_WEIGHT / occs)
 44 | 
 45 |         self.samples_X = torch.from_numpy(self.samples_X).type(torch.FloatTensor)  # torch.Size([203800, 40])
 46 |         self.samples_y = torch.from_numpy(self.samples_y).type(torch.LongTensor)   # torch.Size([203800])
 47 |         self.x_shape = (self.observation_length, self.samples_X.shape[1])          # shape of a single sample
 48 | 
 49 |     def __len__(self):
 50 |         """ Denotes the total number of samples. """
 51 |         return self.samples_X.shape[0] - self.observation_length
 52 | 
 53 |     def __getitem__(self, index):
 54 |         """ Generates samples of data. """
 55 |         sample = self.samples_X[index: index + self.observation_length], self.samples_y[index + self.observation_length - 1]
 56 |         return sample
 57 | 
 58 |     @staticmethod
 59 |     def __class_balancing(y):
 60 |         ys_occurrences = collections.Counter(y)
 61 |         occs = np.array([ys_occurrences[k] for k in sorted(ys_occurrences)])
 62 |         return ys_occurrences, occs
 63 | 
 64 |     def __parse_dataset(self):
 65 |         """ Reads the dataset from the FI files. """
 66 | 
 67 |         AUCTION = 'Auction' if self.auction else 'NoAuction'
 68 |         N = '1.' if self.normalization_type == cst.NormalizationType.Z_SCORE else '2.' if self.normalization_type == cst.NormalizationType.MINMAX else '3.'
 69 |         NORMALIZATION = 'Zscore' if self.normalization_type == cst.NormalizationType.Z_SCORE else 'MinMax' if self.normalization_type == cst.NormalizationType.MINMAX else 'DecPre'
 70 |         DATASET_TYPE = 'Training' if self.dataset_type == cst.DatasetType.TRAIN or self.dataset_type == cst.DatasetType.VALIDATION else 'Testing'
 71 |         DIR = self.fi_data_dir + \
 72 |                  "/{}".format(AUCTION) + \
 73 |                  "/{}{}_{}".format(N, AUCTION, NORMALIZATION) + \
 74 |                  "/{}_{}_{}".format(AUCTION, NORMALIZATION, DATASET_TYPE)
 75 | 
 76 |         NORMALIZATION = 'ZScore' if self.normalization_type == cst.NormalizationType.Z_SCORE else 'MinMax' if self.normalization_type == cst.NormalizationType.MINMAX else 'DecPre'
 77 |         DATASET_TYPE = 'Train' if self.dataset_type == cst.DatasetType.TRAIN or self.dataset_type == cst.DatasetType.VALIDATION else 'Test'
 78 | 
 79 |         F_EXTENSION = '.txt'
 80 | 
 81 |         # if it is training time, we open the 7-days training file
 82 |         # if it is testing time, we open the 3 test files
 83 |         if self.dataset_type == cst.DatasetType.TRAIN or self.dataset_type == cst.DatasetType.VALIDATION:
 84 | 
 85 |             F_NAME = DIR + '/{}_Dst_{}_{}_CF_7'.format(DATASET_TYPE, AUCTION, NORMALIZATION) + F_EXTENSION
 86 | 
 87 |             if not os.path.exists(F_NAME):
 88 |                 error =  "\n\nFile {} not found! Make sure to follow the following steps.".format(F_NAME)
 89 |                 error += "\n\n (1) Download the dataset in data/datasets, by running:\n{}".format(cst.DOWNLOAD_FI_COMMAND)
 90 |                 error += "\n (2) Unzip the file."
 91 |                 error += "\n (3) Run: mv data/datasets/published/ data/datasets/FI-2010"
 92 |                 error += "\n (4) Unzip data/datasets/FI-2010/BenchmarkDatasets/BenchmarkDatasets.zip in data/datasets/FI-2010/BenchmarkDatasets"
 93 |                 error += "\n"
 94 |                 raise FileNotFoundError(error)
 95 | 
 96 |             out_df = np.loadtxt(F_NAME)
 97 | 
 98 |             n_samples_train = int(np.floor(out_df.shape[1] * self.train_val_split))
 99 |             if self.dataset_type == cst.DatasetType.TRAIN:
100 |                 out_df = out_df[:, :n_samples_train]
101 | 
102 |             elif self.dataset_type == cst.DatasetType.VALIDATION:
103 |                 out_df = out_df[:, n_samples_train:]
104 | 
105 |         else:
106 |             F_NAMES = [DIR + '/{}_Dst_{}_{}_CF_{}'.format(DATASET_TYPE, AUCTION, NORMALIZATION, i) + F_EXTENSION for i in range(7, 10)]
107 |             out_df = np.hstack([np.loadtxt(F_NAME) for F_NAME in F_NAMES])
108 | 
109 |         self.data = out_df
110 | 
111 |     def __prepare_X(self):
112 |         """ we only consider the first 40 features, i.e. the 10 levels of the LOB"""
113 |         LOB_TEN_LEVEL_FEATURES = 40
114 |         self.samples_X = self.data[:LOB_TEN_LEVEL_FEATURES, :].transpose()
115 | 
116 |     def __prepare_y(self):
117 |         """ gets the labels """
118 |         # the last five elements in self.data contain the labels
119 |         # they are based on the possible horizon values [1, 2, 3, 5, 10]
120 |         self.samples_y = self.data[cst.HORIZONS_MAPPINGS_FI[self.horizon], :]
121 |         self.samples_y -= 1
122 | 
123 |     def __prepare_dataset(self):
124 |         """ Crucial call! """
125 | 
126 |         self.__parse_dataset()
127 | 
128 |         self.__prepare_X()
129 |         self.__prepare_y()
130 | 
131 |         print("Dataset type:", self.dataset_type, " - normalization:", self.normalization_type)
132 |         occs, occs_vec = self.__class_balancing(self.samples_y)
133 | 
134 |         perc = ["{}%".format(round(i, 2)) for i in (occs_vec / np.sum(occs_vec)) * 100]
135 |         print("Balancing", occs, "=>", perc)
136 |         print()
137 | 


--------------------------------------------------------------------------------
/src/lobcast.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import argparse
  4 | import os
  5 | from datetime import datetime
  6 | from enum import Enum
  7 | from matplotlib.backends.backend_pdf import PdfPages
  8 | 
  9 | import numpy as np
 10 | from pytorch_lightning import seed_everything
 11 | 
 12 | import src.constants as cst
 13 | from src.metrics.metrics_log import Metrics
 14 | 
 15 | np.set_printoptions(suppress=True)
 16 | from src.utils.utils_generic import str_to_bool
 17 | from src.settings import Settings
 18 | from src.hyper_parameters import HPTunable, HPTuned
 19 | 
 20 | from src.models.model_callbacks import callback_save_model
 21 | from src.data_preprocessing.utils_dataset import pick_dataset
 22 | from src.models.utils_models import pick_model
 23 | from pytorch_lightning import Trainer
 24 | from src.metrics.report import plot_metric_training, plot_metric_best, saved_metrics
 25 | from src.utils.utils_generic import get_class_arguments
 26 | 
 27 | 
 28 | class LOBCAST:
 29 |     """ LOBCAST class is responsible to maintain all the information about the current simulation.
 30 |     Including the simulation settings, tunable hyperparameters of the models. """
 31 | 
 32 |     def __init__(self):
 33 | 
 34 |         self.SETTINGS = Settings()     # the settings of the simulation
 35 |         self.HP_TUNABLE = HPTunable()  # the hyperparameters to vary and their domains
 36 |         self.HP_TUNED = HPTuned()      # the hyperparameters and their values
 37 | 
 38 |     def update_settings(self, setting_params: dict):
 39 |         """ Updates the settings with the given parameters. """
 40 |         # settings new settings
 41 |         for key, value in setting_params.items():
 42 |             self.SETTINGS.__setattr__(key, value)
 43 | 
 44 |         self.SETTINGS.check_parameters_validity()
 45 |         self.__init_hyper_parameters()
 46 | 
 47 |         if self.SETTINGS.IS_SANITY_CHECK:
 48 |             self.__sanity_check_settings()
 49 | 
 50 |         # at this point parameters are set
 51 |         print("\nRunning with settings:\n", self.SETTINGS.__dict__)
 52 | 
 53 |     def __sanity_check_settings(self):
 54 |         print("THIS IS A SANITY CHECK RUN.")
 55 |         self.SETTINGS.EPOCHS_UB = 1
 56 | 
 57 |     def update_hyper_parameters(self, tuning_parameters: dict):
 58 |         """Update the hyperparameters with the given parameters"""
 59 | 
 60 |         # coming from wandb or from local grid search
 61 |         for key, value in tuning_parameters.items():
 62 |             self.HP_TUNED.update_hyperparameter(key, value)
 63 | 
 64 |         # at this point parameters are set
 65 |         print("\nRunning with hyper parameters:\n", self.HP_TUNED.__dict__)
 66 | 
 67 |     def end_setup(self, wandb_instance=None):
 68 |         """ Ends the simulation setup based on the chosen settings and parameters. """
 69 | 
 70 |         self.DATE_TIME = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
 71 |         dir_detail = "SANITY_CHECK" if self.SETTINGS.IS_SANITY_CHECK else self.DATE_TIME
 72 |         self.SETTINGS.DIR_EXPERIMENTS = f"{cst.DIR_EXPERIMENTS}-({dir_detail})/"
 73 | 
 74 |         self.__seed_everything(self.SETTINGS.SEED)
 75 |         self.__setup_all_directories(self.SETTINGS)
 76 | 
 77 |         self.METRICS = Metrics(self.SETTINGS.DIR_EXPERIMENTS, self.sim_name_format())
 78 |         self.METRICS.dump_info(self.SETTINGS.__dict__, self.HP_TUNED.__dict__)
 79 | 
 80 |         self.WANDB_INSTANCE = wandb_instance
 81 | 
 82 |     def __init_hyper_parameters(self):
 83 |         """ Init the simulation hyperparameters gathering those from the chosen model, declared by the user. """
 84 |         model_arguments = get_class_arguments(self.SETTINGS.PREDICTION_MODEL.value.model)[2:]
 85 |         model_tunable = self.SETTINGS.PREDICTION_MODEL.value.tunable_parameters
 86 | 
 87 |         # checks that HP are meaningful
 88 |         for param, values in model_tunable.__dict__.items():
 89 |             if not (param in model_arguments or param in self.HP_TUNABLE.__dict__):
 90 |                 raise KeyError(f"The declared hyper parameters \'{param}\' of model {self.SETTINGS.PREDICTION_MODEL.name} is never used. Remove it.")
 91 | 
 92 |         self.HP_TUNABLE = model_tunable
 93 | 
 94 |         # set to default, add the same parameters in the HP_TUNED object
 95 |         for key, _ in self.HP_TUNABLE.__dict__.items():
 96 |             self.HP_TUNED.add_hyperparameter(key, None)
 97 | 
 98 |     def __seed_everything(self, seed):
 99 |         """ Sets the random seed of the whole simulator. """
100 |         seed_everything(seed)
101 | 
102 |     def sim_name_format(self):
103 |         """ The name of the simulation. """
104 |         SIM_NAME = "MOD={}-SEED={}-DS={}-HU={}-HP={}-HF={}-OB={}"
105 |         return SIM_NAME.format(
106 |             self.SETTINGS.PREDICTION_MODEL.name,
107 |             self.SETTINGS.SEED,
108 |             self.SETTINGS.DATASET_NAME.value,
109 |             self.SETTINGS.PREDICTION_HORIZON_UNIT.name,
110 |             self.SETTINGS.PREDICTION_HORIZON_PAST,
111 |             self.SETTINGS.PREDICTION_HORIZON_FUTURE,
112 |             self.SETTINGS.OBSERVATION_PERIOD,
113 |         )
114 | 
115 |     def parse_cl_arguments(self):
116 |         """ Parses the arguments for the command line. """
117 |         parser = argparse.ArgumentParser(description='LOBCAST arguments:')
118 | 
119 |         # every field in the settings, can be set crom cl
120 |         for k, v in self.SETTINGS.__dict__.items():
121 |             var = v.name if isinstance(v, Enum) else v
122 |             type_var = str if isinstance(v, Enum) else type(v)
123 |             type_var = str_to_bool if type(v) == bool else type_var  # to parse bool
124 |             parser.add_argument(f'--{k}', default=var, type=type_var)
125 | 
126 |         args = vars(parser.parse_args())
127 | 
128 |         print("Gathering CLI values.")
129 |         setting_conf = dict()
130 |         # every field in the settings, is set based on the parsed values, enums are parsed by NAME
131 |         for k, v in self.SETTINGS.__dict__.items():
132 |             value = v.__class__[args[k]] if isinstance(v, Enum) else args[k]
133 |             setting_conf[k] = value
134 | 
135 |         return setting_conf
136 | 
137 |     @staticmethod
138 |     def __setup_all_directories(settings):
139 |         """ Creates the necessary directories for the simulation. """
140 |         # create the paths for the simulation if they do not exist already
141 |         paths = ["data", "data/datasets", "data/experiments", settings.DIR_EXPERIMENTS]
142 |         for p in paths:
143 |             if not os.path.exists(p):
144 |                 os.makedirs(p)
145 | 
146 |     def run(self):
147 |         """ After having chosen settings and hyperparams, it runs LOBCAST training loop. """
148 | 
149 |         data_module = pick_dataset(self)
150 |         nets_module = pick_model(self, data_module)
151 | 
152 |         trainer = Trainer(
153 |             accelerator=self.SETTINGS.DEVICE,
154 |             devices=self.SETTINGS.N_GPUs,
155 |             check_val_every_n_epoch=self.SETTINGS.VALIDATION_EVERY,
156 |             max_epochs=self.SETTINGS.EPOCHS_UB,
157 |             num_sanity_val_steps=1 if self.SETTINGS.IS_SANITY_CHECK else 0,
158 |             callbacks=[
159 |                 callback_save_model(self.SETTINGS.DIR_EXPERIMENTS, self.sim_name_format(), cst.VALIDATION_METRIC, top_k=3)
160 |             ],
161 |         )
162 | 
163 |         model_path = self.SETTINGS.TEST_MODEL_PATH if self.SETTINGS.IS_TEST_ONLY else "best"
164 | 
165 |         if not self.SETTINGS.IS_TEST_ONLY:
166 |             trainer.fit(nets_module, data_module)
167 |             self.METRICS.reset_stats()
168 | 
169 |             # this flag is used when running simulation to know if final validation on best model is running
170 |             self.METRICS.is_best_model = True
171 | 
172 |             # best model evaluation starts
173 |             trainer.validate(nets_module, data_module, ckpt_path=model_path)
174 |         trainer.test(nets_module, data_module, ckpt_path=model_path)
175 | 
176 |     def evaluate(self):
177 |         fnames_root = self.SETTINGS.DIR_EXPERIMENTS + self.sim_name_format()
178 |         pdf_best    = PdfPages(fnames_root + "_" + 'metrics_best_plots.pdf')
179 |         pdf_running = PdfPages(fnames_root + "_" + 'metrics_train_plots.pdf')
180 | 
181 |         for m in saved_metrics:
182 |             plot_metric_best(fnames_root + "_" + cst.METRICS_BEST_FILE_NAME, m, pdf_best)
183 |             plot_metric_training(fnames_root + "_" + cst.METRICS_RUNNING_FILE_NAME, m, pdf_running)
184 | 
185 |         pdf_best.close()
186 |         pdf_running.close()
187 | 
188 |     def close(self):
189 |         print('Completed.')
190 |         if self.SETTINGS.IS_SANITY_CHECK:
191 |             exit("Sanity check passed.")
192 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LOBCAST — Stock Price Trend Forecasting with Python
  2 | 
  3 | ## 📈 LOBCAST 
  4 | LOBCAST is a Python-based open-source framework developed for stock market trend forecasting using Limit Order Book (LOB) 
  5 | data. The framework enables users to test deep learning models for the task of Stock Price Trend Prediction (SPTP). 
  6 | It serves as the official repository for the paper titled __LOB-Based Deep Learning Models for Stock Price Trend Prediction: 
  7 | A Benchmark Study__ [[paper](https://link.springer.com/article/10.1007/s10462-024-10715-4)].
  8 | 
  9 | The paper formalizes the SPTP task and the structure of LOB data. 
 10 | In the following sections, we elaborate on downloading LOB data, running stock predictions using LOBCAST with your new DL model,
 11 | model evaluation and comparison.
 12 | 
 13 | #### About mini-LOBCAST
 14 | This main branch represents a newer version of LOBCAST named mini-LOBCAST. It enables benchmarking models on the standard 
 15 | LOB dataset used in the literature, specifically FI-2010 [[dataset](https://etsin.fairdata.fi/dataset/73eb48d7-4dbc-4a10-a52a-da745b47a649)]. 
 16 | This version will be expanded to include more datasets with procedures for handling data consistently for benchmarking. 
 17 | These procedures are already available in the branch v0-LOBCAST, which will be integrated soon. We encourage the use of this version, 
 18 | while also recommending a glance at the other branch for additional implemented models and functions.
 19 | 
 20 | ## Installing LOBCAST 
 21 | 
 22 | You can install LOBCAST by cloning the repository and navigating into the directory:
 23 | 
 24 | ```
 25 | git clone https://github.com/matteoprata/LOBCAST.git
 26 | cd LOBCAST
 27 | ```
 28 | 
 29 | Install all the required dependencies:
 30 | ```
 31 | pip install -r requirements.txt
 32 | ```
 33 | ### Downloading LOB Dataset 
 34 | To download the FI-2010 Dataset [[dataset](https://etsin.fairdata.fi/dataset/73eb48d7-4dbc-4a10-a52a-da745b47a649)], follow these instructions:
 35 | 
 36 | 1. Download the dataset into `data/datasets` by running:
 37 | ```
 38 | mkdir data/datasets
 39 | cd data/datasets
 40 | wget --content-disposition "wget --content-disposition "https://download.fairdata.fi:443/download?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MTU1MDU5OTksImRhdGFzZXQiOiI3M2ViNDhkNy00ZGJjLTRhMTAtYTUyYS1kYTc0NWI0N2E2NDkiLCJwYWNrYWdlIjoiNzNlYjQ4ZDctNGRiYy00YTEwLWE1MmEtZGE3NDViNDdhNjQ5X2JoeXV4aWZqLnppcCIsImdlbmVyYXRlZF9ieSI6IjlmZGRmZmVlLWY4ZDItNDZkNS1hZmIwLWQyOTM0NzdlZjg2ZiIsInJhbmRvbV9zYWx0IjoiYjg1ZjNhM2YifQ.NOT94HPMUdwpi6lFsmnRhkToP2FAdmbmoEkhlRNBQGM"
 41 | ```
 42 | 2. Unzip the file 
 43 | 3. Run:
 44 | ```
 45 | mv data/datasets/published data/datasets/FI-2010
 46 | ```
 47 | 4. Unzip `data/datasets/FI-2010/BenchmarkDatasets/BenchmarkDatasets.zip` into `data/datasets/FI-2010/BenchmarkDatasets`.
 48 | 
 49 | Ensure that this path exists to execute LOBCAST on this dataset:
 50 | ```
 51 | data/datasets/FI-2010/BenchmarkDatasets/NoAuction/1.NoAuction_Zscore/NoAuction_Zscore_Training/*
 52 | ```
 53 | 
 54 | ### Running
 55 | Run LOBCAST locally with an MLP model and FI-2010 dataset using default settings in `src.settings`:
 56 | ```
 57 | python -m src.run 
 58 | ```
 59 | 
 60 | To customize parameters:
 61 | ```
 62 | python -m src.run --SEED 42 --PREDICTION_MODEL BINCTABL --OBSERVATION_PERIOD 10 --EPOCHS_UB 20 --IS_WANDB 0
 63 | ```
 64 | This will execute LOBCAST with seed 42 on the FI-2010 dataset, using the BINCTABL model, with an observation period of 10 events, for 20 epochs, running locally (not on WANDB).
 65 | 
 66 | The `run.py` file allows adjusting the following arguments, which are all attributes of the class `src.settings.Settings`.
 67 | ```
 68 | LOBCAST 
 69 | optional arguments:
 70 |   -h, --help            show this help message and exit
 71 |   --SEED 
 72 |   --DATASET_NAME 
 73 |   --N_TRENDS 
 74 |   --PREDICTION_MODEL 
 75 |   --PREDICTION_HORIZON_UNIT 
 76 |   --PREDICTION_HORIZON_FUTURE 
 77 |   --PREDICTION_HORIZON_PAST 
 78 |   --OBSERVATION_PERIOD 
 79 |   --IS_SHUFFLE_TRAIN_SET 
 80 |   --EPOCHS_UB 
 81 |   --TRAIN_SET_PORTION 
 82 |   --VALIDATION_EVERY 
 83 |   --IS_TEST_ONLY 
 84 |   --TEST_MODEL_PATH 
 85 |   --DEVICE 
 86 |   --N_GPUs 
 87 |   --N_CPUs 
 88 |   --DIR_EXPERIMENTS 
 89 |   --IS_WANDB 
 90 |   --WANDB_SWEEP_METHOD 
 91 |   --IS_SANITY_CHECK 
 92 | ```
 93 | At the end of the execution, json files containing all the statistics of the simulation and a PDF showing the performance 
 94 | of the model will be created at `data/experiments`.
 95 | 
 96 | ### Settings
 97 | To set up a simulation in terms of randomness, choice of dataset, choice of model, observation frame of models, and 
 98 | whether to log the metrics locally or on WANDB, LOBCAST allows setting all these parameters by accessing the 
 99 | `Lobcast().SETTINGS` object. These parameters are set at the beginning of the simulation and overwritten by the arguments 
100 | passed from the command-line interface (CLI).
101 | 
102 | ### Hyperparameters
103 | To find the right learning parameters of the model, hyperparameters can be specified in `src.hyperparameters.HPTunable`. 
104 | By default, it contains the batch size, learning rate, and optimizer, but it can be extended by the user to specify other 
105 | parameters. Keep in this class all the hyperparameters common to all the models. In the following we will see how to add 
106 | model specific parameters. 
107 | 
108 | You can specify all the values that the parameters can take as ```{'values': [1, 2, 3]}```, or the min-max range as 
109 | ```{'min': 1, 'max': 100}```. 
110 | 
111 | 
112 | ### LOBCAST logic
113 | The logic of the simulator can be summarized as follows:
114 | 
115 | 1. Initialize LOBCAST.
116 | 2. Parse settings from the CLI.
117 | 3. Update settings.
118 | 4. Choose hyperparameter configurations.
119 | 5. Run the simulation, including data gathering, model selection, and training loop.
120 | 6. Generate a PDF with evaluation metrics.
121 | 7. Close the simulation.
122 | 
123 | The code below shows the simulation logic in `src.run`:
124 | ```
125 | sim = LOBCAST()
126 | 
127 | setting_conf = sim.parse_cl_arguments()   # parse settings from CLI
128 | sim.update_settings(setting_conf)         # updates simulation settings 
129 | 
130 | hparams_configs = grid_search_configurations(sim.HP_TUNABLE.__dict__)[0]   # a dict with params chosen by grid search at 0
131 | sim.update_hyper_parameters(hparams_config)                                # update the simulation parameters 
132 | sim.end_setup()
133 | 
134 | sim.run()         # run the simulation, data gathering, model selection and trainig loop
135 | sim.evaluate()    # generate a pdf with the evaluation metrics
136 | sim.close()
137 | ```
138 | 
139 | 
140 | ### Experimental Plans (_optional_)
141 | Running multiple experiments sequentially is facilitated by instantiating an execution plan. Alternatively to `src.run`, 
142 | one can run sequential tests from `src.run_batch`.
143 | 
144 | ```
145 | ep = ExecutionPlan(setup01.INDEPENDENT_VARIABLES,
146 |                    setup01.INDEPENDENT_VARIABLES_CONSTRAINTS)
147 |                    
148 | setting_confs = ep.configurations()
149 | ```
150 | 
151 | An execution plan is defined in terms of `INDEPENDENT_VARIABLES` and `INDEPENDENT_VARIABLES_CONSTRAINTS`. 
152 | These are two dictionaries. The first dictionary represents the variables to vary in a grid search. 
153 | The `INDEPENDENT_VARIABLES_CONSTRAINTS` dictionary allows defining how the variable should be set when it does not vary, 
154 | thus limiting the search concerning the grid search and eliminating certain configurations. The `setting_confs` contain 
155 | the configurations to pass to `sim.update_settings(setting_conf)`, iteratively.
156 | 
157 | To run an execution plan with the dictionaries defined in `src.batch_experiments.setup01`, execute:
158 | ```
159 | python -m src.run_batch
160 | ```
161 | 
162 | Procedures for gathering performances from different models and generating comprehensive plots for benchmarking will be added in a new update.
163 | 
164 | ### Adding a New Model
165 | To integrate a new model into LOBCAST, follow these steps:
166 | 
167 | 1. Create model file: Add a `.py` file in the `src.models` directory. Define your new model class, inheriting from 
168 | `src.models.lobcast_model.LOBCAST_model`:
169 | 
170 | ```
171 | class MyNewModel(LOBCAST_model):
172 |     def __init__(self, input_dim, output_dim, param1, param2, param3):
173 |         super().__init__(input_dim, output_dim)
174 |         ...
175 | ```
176 | 
177 | 2. Define hyperparameters: Optionally define the domains of your model parameters by creating a class that inherits from 
178 | `src.hyper_parameters.HPTunable`:
179 | 
180 | ```
181 | class HP(HPTunable):
182 |     def __init__(self):
183 |         super().__init__()
184 |         self.param1 = {"values": [16, 32, 64]}
185 |         self.param2 = {"values": [.1, .5]}
186 | ``` 
187 | 3. Declare LOBCAST module: Instantiate a `src.models.lobcast_model.LOBCAST_module` to encapsulate the model and its hyperparameters:
188 | ```
189 | mynewmodel_lm = LOBCAST_module(MLP, HP())
190 | ```
191 | 
192 | 4. Declare model in Models enumerator: Add your model to the `src.models.models_classes.Models` enumerator:
193 | 
194 | ```
195 | class Models(Enum):
196 |     NEW_MODEL = mynewmodel_lm
197 | ```
198 | 
199 | Now, you can execute the new model using the command:
200 | ```
201 | python -m src.run --SEED 42 --PREDICTION_MODEL NEW_MODEL --IS_WANDB 0
202 | ```
203 | Any undeclared settings will be assigned default values.
204 | 
205 | 
206 | Optionally, enforce constraints on the model settings using `src.settings.Settings.check_parameters_validity`. For example:
207 | 
208 | ```
209 | constraint = (not self.PREDICTION_MODEL == cst.Models.NEW_MODEL or self.OBSERVATION_PERIOD == 10,
210 |       f"At the moment, NEW_MODEL only allows OBSERVATION_PERIOD = 10, {self.OBSERVATION_PERIOD} given.")
211 | ```       
212 | Ensure to add this constraint to `src.settings.Settings.check_parameters_validity.CONSTRAINTS` for enforcement.
213 | 
214 | ### References
215 | Prata, Matteo, et al. __"LOB-based deep learning models for stock price trend prediction: a benchmark study."__ Artificial Intelligence Review 57.5 (2024): 1-45.
216 | 
217 | > _The recent advancements in Deep Learning (DL) research have notably influenced the finance sector. We examine the 
218 | > robustness and generalizability of fifteen state-of-the-art DL models focusing on Stock Price Trend Prediction (SPTP) 
219 | > based on Limit Order Book (LOB) data. To carry out this study, we developed LOBCAST, an open-source framework that 
220 | > incorporates data preprocessing, DL model training, evaluation and profit analysis. Our extensive experiments reveal 
221 | > that all models exhibit a significant performance drop when exposed to new data, thereby raising questions about their 
222 | > real-world market applicability. Our work serves as a benchmark, illuminating the potential and the limitations of current 
223 | > approaches and providing insight for innovative solutions._
224 |  
225 | Link: https://link.springer.com/article/10.1007/s10462-024-10715-4 
226 | 
227 | 
228 | ### Acknowledgments
229 | LOBCAST was developed by [Matteo Prata](https://github.com/matteoprata), [Giuseppe Masi](https://github.com/giuseppemasi99), [Leonardo Berti](https://github.com/LeonardoBerti00), [Andrea Coletta](https://github.com/Andrea94c), [Irene Cannistraci](https://github.com/icannistraci), Viviana Arrigoni.
230 | 


--------------------------------------------------------------------------------