├── xgboost_model.pkl
├── __pycache__
    ├── viz.cpython-312.pyc
    ├── utils.cpython-312.pyc
    ├── config.cpython-312.pyc
    ├── models.cpython-312.pyc
    ├── evaluation.cpython-312.pyc
    ├── advanced_viz.cpython-312.pyc
    ├── data_loader.cpython-312.pyc
    ├── advanced_models.cpython-312.pyc
    └── advanced_evaluation.cpython-312.pyc
├── requirements.txt
├── evaluation.py
├── viz.py
├── data_loader.py
├── phyton_project (1).py
├── config.py
├── TODO.md
├── models.py
├── utils.py
├── advanced_models.py
├── advanced_evaluation.py
├── advanced_viz.py
├── README.md
├── app.py
└── app_advanced.py


/xgboost_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/xgboost_model.pkl


--------------------------------------------------------------------------------
/__pycache__/viz.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/__pycache__/viz.cpython-312.pyc


--------------------------------------------------------------------------------
/__pycache__/utils.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/__pycache__/utils.cpython-312.pyc


--------------------------------------------------------------------------------
/__pycache__/config.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/__pycache__/config.cpython-312.pyc


--------------------------------------------------------------------------------
/__pycache__/models.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/__pycache__/models.cpython-312.pyc


--------------------------------------------------------------------------------
/__pycache__/evaluation.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/__pycache__/evaluation.cpython-312.pyc


--------------------------------------------------------------------------------
/__pycache__/advanced_viz.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/__pycache__/advanced_viz.cpython-312.pyc


--------------------------------------------------------------------------------
/__pycache__/data_loader.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/__pycache__/data_loader.cpython-312.pyc


--------------------------------------------------------------------------------
/__pycache__/advanced_models.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/__pycache__/advanced_models.cpython-312.pyc


--------------------------------------------------------------------------------
/__pycache__/advanced_evaluation.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Madhuarvind/Time-Series-Forecasting-Tool/HEAD/__pycache__/advanced_evaluation.cpython-312.pyc


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit
 2 | tensorflow==2.16.1
 3 | scikit-learn
 4 | pandas
 5 | numpy
 6 | matplotlib
 7 | seaborn
 8 | xgboost
 9 | plotly
10 | statsmodels
11 | scipy
12 | joblib
13 | 


--------------------------------------------------------------------------------
/evaluation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 3 | 
 4 | def mean_absolute_percentage_error(y_true, y_pred):
 5 |     """
 6 |     Calculate Mean Absolute Percentage Error (MAPE).
 7 |     """
 8 |     y_true, y_pred = np.array(y_true), np.array(y_pred)
 9 |     return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
10 | 
11 | def evaluate_model(y_true, y_pred):
12 |     """
13 |     Evaluate model with MAE, RMSE, MAPE, and R2 Score.
14 |     """
15 |     mae = mean_absolute_error(y_true, y_pred)
16 |     rmse = np.sqrt(mean_squared_error(y_true, y_pred))
17 |     mape = mean_absolute_percentage_error(y_true, y_pred)
18 |     r2 = r2_score(y_true, y_pred)
19 |     return {'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}
20 | 


--------------------------------------------------------------------------------
/viz.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | import pandas as pd
 4 | 
 5 | def plot_forecast(y_true, y_pred, title='Forecast vs Actual'):
 6 |     """
 7 |     Plot observed vs predicted values.
 8 |     """
 9 |     plt.figure(figsize=(10, 6))
10 |     plt.plot(y_true.index, y_true, label='Observed', color='blue')
11 |     plt.plot(y_true.index, y_pred, label='Predicted', color='red')
12 |     plt.xlabel('Date')
13 |     plt.ylabel('Value')
14 |     plt.title(title)
15 |     plt.legend()
16 |     plt.show()
17 | 
18 | def plot_residuals(y_true, y_pred):
19 |     """
20 |     Plot residuals.
21 |     """
22 |     residuals = y_true - y_pred
23 |     plt.figure(figsize=(10, 6))
24 |     plt.plot(residuals.index, residuals, color='green')
25 |     plt.axhline(0, color='black', linestyle='--')
26 |     plt.xlabel('Date')
27 |     plt.ylabel('Residuals')
28 |     plt.title('Residuals Plot')
29 |     plt.show()
30 | 
31 | def plot_distribution(residuals):
32 |     """
33 |     Plot distribution of residuals.
34 |     """
35 |     plt.figure(figsize=(8, 6))
36 |     sns.histplot(residuals, kde=True)
37 |     plt.title('Residuals Distribution')
38 |     plt.show()
39 | 


--------------------------------------------------------------------------------
/data_loader.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.model_selection import train_test_split
 4 | 
 5 | def load_data(file_path, date_col='date', target_col='target'):
 6 |     """
 7 |     Load time series data from CSV.
 8 |     Assumes the CSV has a date column and a target column.
 9 |     """
10 |     df = pd.read_csv(file_path, parse_dates=[date_col])
11 |     df.set_index(date_col, inplace=True)
12 |     return df
13 | 
14 | def add_lag_features(df, target_col='target', lags=5):
15 |     """
16 |     Add lag features to the dataframe.
17 |     """
18 |     for lag in range(1, lags + 1):
19 |         df[f'lag_{lag}'] = df[target_col].shift(lag)
20 |     df.dropna(inplace=True)
21 |     return df
22 | 
23 | def preprocess_data(df, target_col='target', lags=5, test_size=0.2):
24 |     """
25 |     Preprocess data: add lags, split into train and test.
26 |     """
27 |     df = add_lag_features(df, target_col, lags)
28 |     X = df.drop(columns=[target_col])
29 |     y = df[target_col]
30 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=False)
31 |     return X_train, X_test, y_train, y_test
32 | 
33 | def create_sample_data():
34 |     """
35 |     Create sample data for demonstration.
36 |     """
37 |     date_range = pd.date_range(start='2022-01-01', periods=100, freq='D')
38 |     time_series_data = np.cumsum(np.random.randn(100))
39 |     df = pd.DataFrame({'date': date_range, 'target': time_series_data})
40 |     df.set_index('date', inplace=True)
41 |     return df
42 | 


--------------------------------------------------------------------------------
/phyton_project (1).py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """phyton-project
 3 | 
 4 | Automatically generated by Colab.
 5 | 
 6 | Original file is located at
 7 |     https://colab.research.google.com/drive/1ejf8lG6yOr-FjL99Nn2dNliPjCHgpxPz
 8 | """
 9 | 
10 | import pandas as pd
11 | import numpy as np
12 | import xgboost as xgb
13 | import matplotlib.pyplot as plt
14 | import seaborn as sns
15 | 
16 | # Load your actual time series data into a pandas DataFrame (replace this with your data)
17 | # For demonstration purposes, let's create a sample dataset
18 | date_range = pd.date_range(start='2022-01-01', periods=100, freq='D')
19 | time_series_data = np.cumsum(np.random.randn(100))
20 | df = pd.DataFrame({'date': date_range, 'target': time_series_data})
21 | 
22 | # Convert datetime column to features (year, month, day)
23 | df['year'] = df['date'].dt.year
24 | df['month'] = df['date'].dt.month
25 | df['day'] = df['date'].dt.day
26 | 
27 | # Drop the original datetime column
28 | df.drop(columns=['date'], inplace=True)
29 | 
30 | # Split data into train and validation sets
31 | train_size = int(0.8 * len(df))
32 | train, val = df[:train_size], df[train_size:]
33 | 
34 | # Define features and target
35 | X_train, y_train = train.drop(columns=['target']), train['target']
36 | X_val, y_val = val.drop(columns=['target']), val['target']
37 | 
38 | # Train an XGBoost model
39 | model = xgb.XGBRegressor()
40 | model.fit(X_train, y_train)
41 | 
42 | # Make predictions
43 | y_pred = model.predict(X_val)
44 | 
45 | # Visualize observed vs. predicted values
46 | plt.plot(val.index, y_val, label='Observed', color='blue')
47 | plt.plot(val.index, y_pred, label='Predicted', color='red')
48 | plt.xlabel('Date')
49 | plt.ylabel('Value')
50 | plt.title('XGBoost Time Series Forecasting')
51 | plt.legend()
52 | plt.show()


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # Configuration settings for the Time Series Forecasting App
 4 | 
 5 | # Default hyperparameters for models
 6 | DEFAULT_PARAMS = {
 7 |     'XGBoost': {
 8 |         'n_estimators': 100,
 9 |         'learning_rate': 0.1,
10 |         'max_depth': 6
11 |     },
12 |     'Random Forest': {
13 |         'n_estimators': 100,
14 |         'max_depth': 10,
15 |         'min_samples_split': 2
16 |     },
17 |     'LSTM': {
18 |         'units': 50,
19 |         'epochs': 10,
20 |         'batch_size': 32,
21 |         'dropout': 0.2
22 |     }
23 | }
24 | 
25 | # Hyperparameter tuning grids
26 | TUNING_GRIDS = {
27 |     'XGBoost': {
28 |         'n_estimators': [50, 100, 150],
29 |         'learning_rate': [0.01, 0.1, 0.2],
30 |         'max_depth': [3, 6, 9]
31 |     },
32 |     'Random Forest': {
33 |         'n_estimators': [50, 100, 150],
34 |         'max_depth': [5, 10, 15],
35 |         'min_samples_split': [2, 5, 10]
36 |     }
37 | }
38 | 
39 | # Data preprocessing settings
40 | DATA_SETTINGS = {
41 |     'default_lags': 5,
42 |     'test_size': 0.2,
43 |     'date_col': 'date',
44 |     'target_col': 'target'
45 | }
46 | 
47 | # Visualization settings
48 | VIZ_SETTINGS = {
49 |     'figsize': (10, 6),
50 |     'colors': {
51 |         'observed': 'blue',
52 |         'predicted': 'red',
53 |         'residuals': 'green'
54 |     }
55 | }
56 | 
57 | # App settings
58 | APP_SETTINGS = {
59 |     'title': 'Advanced Time Series Forecasting Web App',
60 |     'sidebar_title': 'Configuration',
61 |     'default_model': 'XGBoost'
62 | }
63 | 
64 | # File paths
65 | DATA_DIR = 'data'
66 | MODEL_DIR = 'models'
67 | RESULTS_DIR = 'results'
68 | 
69 | # Create directories if they don't exist
70 | for dir_path in [DATA_DIR, MODEL_DIR, RESULTS_DIR]:
71 |     if not os.path.exists(dir_path):
72 |         os.makedirs(dir_path)
73 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | # TODO List for Enhancing Time Series Forecasting Project
 2 | 
 3 | ## Step 1: Create requirements.txt ✅
 4 | - List all necessary dependencies: streamlit, tensorflow, scikit-learn, pandas, numpy, matplotlib, seaborn, xgboost
 5 | 
 6 | ## Step 2: Create data_loader.py ✅
 7 | - Implement functions for loading CSV data, preprocessing (handling dates, adding lag features), splitting into train/val
 8 | 
 9 | ## Step 3: Create models.py ✅
10 | - Implement XGBoost, Random Forest, and LSTM models with hyperparameter tuning support
11 | 
12 | ## Step 4: Create evaluation.py ✅
13 | - Implement evaluation metrics: MAE, RMSE, MAPE
14 | 
15 | ## Step 5: Create viz.py ✅
16 | - Implement visualization functions: observed vs predicted plots, residuals, etc.
17 | 
18 | ## Step 6: Create app.py ✅
19 | - Build Streamlit web app with UI for data upload, feature selection, model choice, hyperparams, and displaying results
20 | 
21 | ## Step 7: Install dependencies ✅
22 | - Run pip install -r requirements.txt
23 | 
24 | ## Step 8: Run and test the Streamlit app ✅
25 | - Execute streamlit run app.py and verify all features work
26 | 
27 | ## Step 9: Verify and finalize ✅
28 | - Check for any bugs, ensure performance optimizations are in place
29 | 
30 | ## Step 10: Add Advanced Features ✅
31 | - Create config.py for configuration management
32 | - Create utils.py for utility functions (validation, scaling, logging, etc.)
33 | - Create advanced_models.py with additional models (Gradient Boosting, AdaBoost, SVR, MLP, Bidirectional LSTM, GRU, ARIMA, SARIMA)
34 | - Create advanced_evaluation.py with comprehensive metrics (SMAPE, MASE, MDA, etc.)
35 | - Create advanced_viz.py with interactive plots and advanced visualizations
36 | - Update requirements.txt with additional dependencies (plotly, statsmodels, scipy, joblib)
37 | - Enhance app.py with new models, advanced metrics, and interactive visualizations
38 | 
39 | ## Step 11: Final Testing and Documentation ✅
40 | - Test all new features and models
41 | - Ensure backward compatibility
42 | - Add proper error handling
43 | - Create README.md with project description and usage instructions
44 | 


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
 1 | import xgboost as xgb
 2 | from sklearn.ensemble import RandomForestRegressor
 3 | from sklearn.model_selection import GridSearchCV
 4 | from tensorflow.keras.models import Sequential
 5 | from tensorflow.keras.layers import LSTM, Dense
 6 | import numpy as np
 7 | 
 8 | def train_xgboost(X_train, y_train, params=None):
 9 |     """
10 |     Train XGBoost model with optional hyperparameter tuning.
11 |     """
12 |     if params is None:
13 |         params = {'n_estimators': 100, 'learning_rate': 0.1}
14 |     model = xgb.XGBRegressor(**params)
15 |     model.fit(X_train, y_train)
16 |     return model
17 | 
18 | def train_random_forest(X_train, y_train, params=None):
19 |     """
20 |     Train Random Forest model with optional hyperparameter tuning.
21 |     """
22 |     if params is None:
23 |         params = {'n_estimators': 100, 'max_depth': 10}
24 |     model = RandomForestRegressor(**params)
25 |     model.fit(X_train, y_train)
26 |     return model
27 | 
28 | def train_lstm(X_train, y_train, params=None):
29 |     """
30 |     Train LSTM model.
31 |     Note: LSTM requires 3D input (samples, timesteps, features).
32 |     Assuming X_train is reshaped appropriately.
33 |     """
34 |     if params is None:
35 |         params = {'units': 50, 'epochs': 10, 'batch_size': 32}
36 |     # Reshape for LSTM: assuming univariate, timesteps=1 for simplicity
37 |     X_train_reshaped = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
38 |     model = Sequential()
39 |     model.add(LSTM(params['units'], input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
40 |     model.add(Dense(1))
41 |     model.compile(optimizer='adam', loss='mse')
42 |     model.fit(X_train_reshaped, y_train, epochs=params['epochs'], batch_size=params['batch_size'], verbose=0)
43 |     return model
44 | 
45 | def tune_hyperparameters(model_type, X_train, y_train, param_grid):
46 |     """
47 |     Perform hyperparameter tuning using GridSearchCV.
48 |     """
49 |     if model_type == 'xgboost':
50 |         model = xgb.XGBRegressor()
51 |     elif model_type == 'random_forest':
52 |         model = RandomForestRegressor()
53 |     else:
54 |         raise ValueError("Unsupported model type for tuning")
55 |     grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error')
56 |     grid_search.fit(X_train, y_train)
57 |     return grid_search.best_estimator_, grid_search.best_params_
58 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from sklearn.preprocessing import StandardScaler, MinMaxScaler
  4 | import logging
  5 | 
  6 | # Set up logging
  7 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | def setup_logging(log_level=logging.INFO):
 11 |     """
 12 |     Set up logging configuration.
 13 |     """
 14 |     logging.basicConfig(level=log_level, format='%(asctime)s - %(levelname)s - %(message)s')
 15 | 
 16 | def validate_data(df, date_col='date', target_col='target'):
 17 |     """
 18 |     Validate the input data.
 19 |     """
 20 |     if df.empty:
 21 |         raise ValueError("DataFrame is empty")
 22 |     if date_col not in df.columns:
 23 |         raise ValueError(f"Date column '{date_col}' not found in data")
 24 |     if target_col not in df.columns:
 25 |         raise ValueError(f"Target column '{target_col}' not found in data")
 26 |     if not pd.api.types.is_datetime64_any_dtype(df[date_col]):
 27 |         raise ValueError(f"Date column '{date_col}' is not in datetime format")
 28 |     logger.info("Data validation passed")
 29 | 
 30 | def scale_features(X_train, X_test, method='standard'):
 31 |     """
 32 |     Scale features using StandardScaler or MinMaxScaler.
 33 |     """
 34 |     if method == 'standard':
 35 |         scaler = StandardScaler()
 36 |     elif method == 'minmax':
 37 |         scaler = MinMaxScaler()
 38 |     else:
 39 |         raise ValueError("Invalid scaling method. Choose 'standard' or 'minmax'")
 40 | 
 41 |     X_train_scaled = scaler.fit_transform(X_train)
 42 |     X_test_scaled = scaler.transform(X_test)
 43 |     return X_train_scaled, X_test_scaled, scaler
 44 | 
 45 | def inverse_scale_predictions(scaled_pred, scaler, original_y):
 46 |     """
 47 |     Inverse scale predictions if target was scaled.
 48 |     """
 49 |     # Assuming target is not scaled in this implementation
 50 |     return scaled_pred
 51 | 
 52 | def save_model(model, filename):
 53 |     """
 54 |     Save trained model to file.
 55 |     """
 56 |     import joblib
 57 |     joblib.dump(model, filename)
 58 |     logger.info(f"Model saved to {filename}")
 59 | 
 60 | def load_model(filename):
 61 |     """
 62 |     Load model from file.
 63 |     """
 64 |     import joblib
 65 |     model = joblib.load(filename)
 66 |     logger.info(f"Model loaded from {filename}")
 67 |     return model
 68 | 
 69 | def calculate_forecast_accuracy(y_true, y_pred, threshold=0.1):
 70 |     """
 71 |     Calculate forecast accuracy based on a threshold.
 72 |     """
 73 |     accuracy = np.mean(np.abs((y_true - y_pred) / y_true) < threshold) * 100
 74 |     return accuracy
 75 | 
 76 | def generate_forecast_report(metrics, model_name):
 77 |     """
 78 |     Generate a summary report of the forecast results.
 79 |     """
 80 |     report = f"""
 81 |     Forecast Report for {model_name}
 82 |     ================================
 83 |     MAE: {metrics['MAE']:.4f}
 84 |     RMSE: {metrics['RMSE']:.4f}
 85 |     MAPE: {metrics['MAPE']:.4f}%
 86 |     """
 87 |     return report
 88 | 
 89 | def detect_outliers(df, column, method='iqr', threshold=1.5):
 90 |     """
 91 |     Detect outliers in a column using IQR or Z-score method.
 92 |     """
 93 |     if method == 'iqr':
 94 |         Q1 = df[column].quantile(0.25)
 95 |         Q3 = df[column].quantile(0.75)
 96 |         IQR = Q3 - Q1
 97 |         lower_bound = Q1 - threshold * IQR
 98 |         upper_bound = Q3 + threshold * IQR
 99 |         outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
100 |     elif method == 'zscore':
101 |         z_scores = np.abs((df[column] - df[column].mean()) / df[column].std())
102 |         outliers = df[z_scores > threshold]
103 |     else:
104 |         raise ValueError("Invalid outlier detection method. Choose 'iqr' or 'zscore'")
105 |     return outliers
106 | 
107 | def handle_missing_values(df, method='interpolate'):
108 |     """
109 |     Handle missing values in the dataframe.
110 |     """
111 |     if method == 'interpolate':
112 |         df = df.interpolate(method='linear')
113 |     elif method == 'forward_fill':
114 |         df = df.fillna(method='ffill')
115 |     elif method == 'backward_fill':
116 |         df = df.fillna(method='bfill')
117 |     elif method == 'drop':
118 |         df = df.dropna()
119 |     else:
120 |         raise ValueError("Invalid missing value handling method")
121 |     return df
122 | 
123 | def create_time_features(df, date_col='date'):
124 |     """
125 |     Create additional time-based features from date column.
126 |     """
127 |     df = df.copy()
128 |     df['year'] = df[date_col].dt.year
129 |     df['month'] = df[date_col].dt.month
130 |     df['day'] = df[date_col].dt.day
131 |     df['day_of_week'] = df[date_col].dt.dayofweek
132 |     df['quarter'] = df[date_col].dt.quarter
133 |     df['is_weekend'] = df[date_col].dt.dayofweek.isin([5, 6]).astype(int)
134 |     return df
135 | 


--------------------------------------------------------------------------------
/advanced_models.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
  4 | from sklearn.svm import SVR
  5 | from sklearn.neural_network import MLPRegressor
  6 | from tensorflow.keras.models import Sequential
  7 | from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, GRU
  8 | from tensorflow.keras.callbacks import EarlyStopping
  9 | from statsmodels.tsa.arima.model import ARIMA
 10 | from statsmodels.tsa.statespace.sarimax import SARIMAX
 11 | import warnings
 12 | warnings.filterwarnings('ignore')
 13 | 
 14 | def train_gradient_boosting(X_train, y_train, params=None):
 15 |     """
 16 |     Train Gradient Boosting model.
 17 |     """
 18 |     if params is None:
 19 |         params = {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 3}
 20 |     model = GradientBoostingRegressor(**params)
 21 |     model.fit(X_train, y_train)
 22 |     return model
 23 | 
 24 | def train_ada_boost(X_train, y_train, params=None):
 25 |     """
 26 |     Train AdaBoost model.
 27 |     """
 28 |     if params is None:
 29 |         params = {'n_estimators': 50, 'learning_rate': 1.0}
 30 |     model = AdaBoostRegressor(**params)
 31 |     model.fit(X_train, y_train)
 32 |     return model
 33 | 
 34 | def train_svr(X_train, y_train, params=None):
 35 |     """
 36 |     Train Support Vector Regression model.
 37 |     """
 38 |     if params is None:
 39 |         params = {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.1}
 40 |     model = SVR(**params)
 41 |     model.fit(X_train, y_train)
 42 |     return model
 43 | 
 44 | def train_mlp(X_train, y_train, params=None):
 45 |     """
 46 |     Train Multi-Layer Perceptron model.
 47 |     """
 48 |     if params is None:
 49 |         params = {'hidden_layer_sizes': (100, 50), 'activation': 'relu', 'max_iter': 500}
 50 |     model = MLPRegressor(**params)
 51 |     model.fit(X_train, y_train)
 52 |     return model
 53 | 
 54 | def train_bidirectional_lstm(X_train, y_train, params=None):
 55 |     """
 56 |     Train Bidirectional LSTM model.
 57 |     """
 58 |     if params is None:
 59 |         params = {'units': 50, 'epochs': 10, 'batch_size': 32, 'dropout': 0.2}
 60 | 
 61 |     X_train_reshaped = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
 62 |     model = Sequential()
 63 |     model.add(Bidirectional(LSTM(params['units'], return_sequences=True), input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
 64 |     model.add(Dropout(params['dropout']))
 65 |     model.add(LSTM(params['units'] // 2))
 66 |     model.add(Dropout(params['dropout']))
 67 |     model.add(Dense(1))
 68 |     model.compile(optimizer='adam', loss='mse')
 69 | 
 70 |     early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
 71 |     model.fit(X_train_reshaped, y_train, epochs=params['epochs'], batch_size=params['batch_size'], verbose=0, callbacks=[early_stopping])
 72 |     return model
 73 | 
 74 | def train_gru(X_train, y_train, params=None):
 75 |     """
 76 |     Train GRU model.
 77 |     """
 78 |     if params is None:
 79 |         params = {'units': 50, 'epochs': 10, 'batch_size': 32, 'dropout': 0.2}
 80 | 
 81 |     X_train_reshaped = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
 82 |     model = Sequential()
 83 |     model.add(GRU(params['units'], input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
 84 |     model.add(Dropout(params['dropout']))
 85 |     model.add(Dense(1))
 86 |     model.compile(optimizer='adam', loss='mse')
 87 | 
 88 |     early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
 89 |     model.fit(X_train_reshaped, y_train, epochs=params['epochs'], batch_size=params['batch_size'], verbose=0, callbacks=[early_stopping])
 90 |     return model
 91 | 
 92 | def train_arima(y_train, order=(5, 1, 0)):
 93 |     """
 94 |     Train ARIMA model.
 95 |     """
 96 |     model = ARIMA(y_train, order=order)
 97 |     model_fit = model.fit()
 98 |     return model_fit
 99 | 
100 | def train_sarima(y_train, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)):
101 |     """
102 |     Train SARIMA model.
103 |     """
104 |     model = SARIMAX(y_train, order=order, seasonal_order=seasonal_order)
105 |     model_fit = model.fit(disp=False)
106 |     return model_fit
107 | 
108 | def ensemble_forecast(models, X_test, weights=None):
109 |     """
110 |     Create ensemble forecast from multiple models.
111 |     """
112 |     if weights is None:
113 |         weights = [1/len(models)] * len(models)
114 | 
115 |     predictions = []
116 |     for model in models:
117 |         if hasattr(model, 'predict'):
118 |             pred = model.predict(X_test)
119 |         else:
120 |             # For statsmodels models
121 |             pred = model.forecast(steps=len(X_test))
122 |         predictions.append(pred)
123 | 
124 |     # Weighted average
125 |     ensemble_pred = np.average(predictions, axis=0, weights=weights)
126 |     return ensemble_pred
127 | 
128 | def train_stacked_model(base_models, meta_model, X_train, y_train, X_val, y_val):
129 |     """
130 |     Train a stacked ensemble model.
131 |     """
132 |     # Get predictions from base models
133 |     base_predictions = []
134 |     for model in base_models:
135 |         model.fit(X_train, y_train)
136 |         pred = model.predict(X_val)
137 |         base_predictions.append(pred)
138 | 
139 |     # Create meta features
140 |     meta_features = np.column_stack(base_predictions)
141 | 
142 |     # Train meta model
143 |     meta_model.fit(meta_features, y_val)
144 | 
145 |     return base_models, meta_model
146 | 


--------------------------------------------------------------------------------
/advanced_evaluation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
  4 | from scipy.stats import shapiro, normaltest
  5 | import warnings
  6 | warnings.filterwarnings('ignore')
  7 | 
  8 | def mean_absolute_percentage_error(y_true, y_pred):
  9 |     """
 10 |     Calculate Mean Absolute Percentage Error (MAPE).
 11 |     """
 12 |     y_true, y_pred = np.array(y_true), np.array(y_pred)
 13 |     return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
 14 | 
 15 | def symmetric_mean_absolute_percentage_error(y_true, y_pred):
 16 |     """
 17 |     Calculate Symmetric Mean Absolute Percentage Error (SMAPE).
 18 |     """
 19 |     y_true, y_pred = np.array(y_true), np.array(y_pred)
 20 |     return 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))
 21 | 
 22 | def mean_absolute_scaled_error(y_true, y_pred, y_train=None, m=1):
 23 |     """
 24 |     Calculate Mean Absolute Scaled Error (MASE).
 25 |     """
 26 |     y_true, y_pred = np.array(y_true), np.array(y_pred)
 27 |     mae = mean_absolute_error(y_true, y_pred)
 28 | 
 29 |     if y_train is not None:
 30 |         # Calculate naive forecast MAE
 31 |         y_train = np.array(y_train)
 32 |         naive_errors = []
 33 |         for i in range(m, len(y_train)):
 34 |             naive_errors.append(abs(y_train[i] - y_train[i-m]))
 35 |         naive_mae = np.mean(naive_errors)
 36 |     else:
 37 |         # Use simple naive method (previous value)
 38 |         naive_mae = np.mean(np.abs(np.diff(y_true)))
 39 | 
 40 |     return mae / naive_mae
 41 | 
 42 | def root_mean_squared_percentage_error(y_true, y_pred):
 43 |     """
 44 |     Calculate Root Mean Squared Percentage Error (RMSPE).
 45 |     """
 46 |     y_true, y_pred = np.array(y_true), np.array(y_pred)
 47 |     return np.sqrt(np.mean(((y_true - y_pred) / y_true) ** 2)) * 100
 48 | 
 49 | def mean_directional_accuracy(y_true, y_pred):
 50 |     """
 51 |     Calculate Mean Directional Accuracy (MDA).
 52 |     """
 53 |     y_true, y_pred = np.array(y_true), np.array(y_pred)
 54 |     actual_direction = np.sign(np.diff(y_true))
 55 |     pred_direction = np.sign(np.diff(y_pred))
 56 |     return np.mean(actual_direction == pred_direction) * 100
 57 | 
 58 | def theil_u_statistic(y_true, y_pred):
 59 |     """
 60 |     Calculate Theil's U statistic.
 61 |     """
 62 |     y_true, y_pred = np.array(y_true), np.array(y_pred)
 63 |     naive_pred = np.roll(y_true, 1)[1:]  # Naive forecast: previous value
 64 |     y_true = y_true[1:]
 65 |     y_pred = y_pred[1:]
 66 | 
 67 |     rmse_model = np.sqrt(mean_squared_error(y_true, y_pred))
 68 |     rmse_naive = np.sqrt(mean_squared_error(y_true, naive_pred))
 69 | 
 70 |     return rmse_model / rmse_naive
 71 | 
 72 | def forecast_bias(y_true, y_pred):
 73 |     """
 74 |     Calculate forecast bias (mean error).
 75 |     """
 76 |     return np.mean(y_pred - y_true)
 77 | 
 78 | def tracking_signal(y_true, y_pred, cumulative=True):
 79 |     """
 80 |     Calculate tracking signal.
 81 |     """
 82 |     errors = y_pred - y_true
 83 |     if cumulative:
 84 |         cum_errors = np.cumsum(errors)
 85 |         cum_abs_errors = np.cumsum(np.abs(errors))
 86 |         return cum_errors / cum_abs_errors
 87 |     else:
 88 |         return errors / np.abs(errors)
 89 | 
 90 | def residual_analysis(y_true, y_pred):
 91 |     """
 92 |     Perform comprehensive residual analysis.
 93 |     """
 94 |     residuals = y_true - y_pred
 95 | 
 96 |     # Normality tests
 97 |     shapiro_stat, shapiro_p = shapiro(residuals)
 98 |     dagostino_stat, dagostino_p = normaltest(residuals)
 99 | 
100 |     # Autocorrelation (simple lag-1)
101 |     autocorr = np.corrcoef(residuals[:-1], residuals[1:])[0, 1]
102 | 
103 |     # Heteroscedasticity test (simple: correlation between |residuals| and predictions)
104 |     hetero_corr = np.corrcoef(np.abs(residuals), y_pred)[0, 1]
105 | 
106 |     analysis = {
107 |         'mean_residual': np.mean(residuals),
108 |         'std_residual': np.std(residuals),
109 |         'shapiro_normality': {'statistic': shapiro_stat, 'p_value': shapiro_p},
110 |         'dagostino_normality': {'statistic': dagostino_stat, 'p_value': dagostino_p},
111 |         'autocorrelation_lag1': autocorr,
112 |         'heteroscedasticity_corr': hetero_corr
113 |     }
114 | 
115 |     return analysis
116 | 
117 | def comprehensive_evaluation(y_true, y_pred, y_train=None):
118 |     """
119 |     Comprehensive model evaluation with multiple metrics.
120 |     """
121 |     metrics = {
122 |         'MAE': mean_absolute_error(y_true, y_pred),
123 |         'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
124 |         'MAPE': mean_absolute_percentage_error(y_true, y_pred),
125 |         'SMAPE': symmetric_mean_absolute_percentage_error(y_true, y_pred),
126 |         'RMSPE': root_mean_squared_percentage_error(y_true, y_pred),
127 |         'R2': r2_score(y_true, y_pred),
128 |         'MDA': mean_directional_accuracy(y_true, y_pred),
129 |         'Theil_U': theil_u_statistic(y_true, y_pred),
130 |         'Forecast_Bias': forecast_bias(y_true, y_pred),
131 |         'MASE': mean_absolute_scaled_error(y_true, y_pred, y_train)
132 |     }
133 | 
134 |     # Residual analysis
135 |     metrics['Residual_Analysis'] = residual_analysis(y_true, y_pred)
136 | 
137 |     return metrics
138 | 
139 | def print_evaluation_report(metrics, model_name="Model"):
140 |     """
141 |     Print a formatted evaluation report.
142 |     """
143 |     report = f"""
144 |     === {model_name} Evaluation Report ===
145 |     ======================================
146 | 
147 |     Error Metrics:
148 |     --------------
149 |     MAE:           {metrics['MAE']:.4f}
150 |     RMSE:          {metrics['RMSE']:.4f}
151 |     MAPE:          {metrics['MAPE']:.4f}%
152 |     SMAPE:         {metrics['SMAPE']:.4f}%
153 |     RMSPE:         {metrics['RMSPE']:.4f}%
154 |     MASE:          {metrics['MASE']:.4f}
155 | 
156 |     Accuracy Metrics:
157 |     -----------------
158 |     R² Score:       {metrics['R2']:.4f}
159 |     MDA:            {metrics['MDA']:.4f}%
160 | 
161 |     Forecast Quality:
162 |     -----------------
163 |     Theil's U:      {metrics['Theil_U']:.4f}
164 |     Forecast Bias:  {metrics['Forecast_Bias']:.4f}
165 | 
166 |     Residual Analysis:
167 |     ------------------
168 |     Mean Residual:  {metrics['Residual_Analysis']['mean_residual']:.4f}
169 |     Std Residual:   {metrics['Residual_Analysis']['std_residual']:.4f}
170 |     Autocorr (lag1): {metrics['Residual_Analysis']['autocorrelation_lag1']:.4f}
171 |     Hetero Corr:    {metrics['Residual_Analysis']['heteroscedasticity_corr']:.4f}
172 | 
173 |     Normality Tests:
174 |     ----------------
175 |     Shapiro-Wilk:   p-value = {metrics['Residual_Analysis']['shapiro_normality']['p_value']:.4f}
176 |     D'Agostino:     p-value = {metrics['Residual_Analysis']['dagostino_normality']['p_value']:.4f}
177 |     """
178 | 
179 |     print(report)
180 |     return report
181 | 


--------------------------------------------------------------------------------
/advanced_viz.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import seaborn as sns
  3 | import pandas as pd
  4 | import numpy as np
  5 | from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
  6 | import plotly.graph_objects as go
  7 | import plotly.express as px
  8 | from plotly.subplots import make_subplots
  9 | import warnings
 10 | warnings.filterwarnings('ignore')
 11 | 
 12 | def plot_forecast_interactive(y_true, y_pred, title='Interactive Forecast vs Actual'):
 13 |     """
 14 |     Create interactive forecast plot using Plotly.
 15 |     """
 16 |     fig = go.Figure()
 17 | 
 18 |     fig.add_trace(go.Scatter(x=y_true.index, y=y_true, mode='lines', name='Observed',
 19 |                              line=dict(color='blue', width=2)))
 20 |     fig.add_trace(go.Scatter(x=y_true.index, y=y_pred, mode='lines', name='Predicted',
 21 |                              line=dict(color='red', width=2, dash='dash')))
 22 | 
 23 |     fig.update_layout(title=title,
 24 |                       xaxis_title='Date',
 25 |                       yaxis_title='Value',
 26 |                       hovermode='x unified')
 27 | 
 28 |     return fig
 29 | 
 30 | def plot_residuals_analysis(y_true, y_pred, figsize=(15, 10)):
 31 |     """
 32 |     Comprehensive residuals analysis plots.
 33 |     """
 34 |     residuals = y_true - y_pred
 35 | 
 36 |     fig, axes = plt.subplots(2, 3, figsize=figsize)
 37 | 
 38 |     # Residuals over time
 39 |     axes[0, 0].plot(residuals.index, residuals, color='green', alpha=0.7)
 40 |     axes[0, 0].axhline(0, color='black', linestyle='--')
 41 |     axes[0, 0].set_title('Residuals Over Time')
 42 |     axes[0, 0].set_xlabel('Date')
 43 |     axes[0, 0].set_ylabel('Residuals')
 44 | 
 45 |     # Residuals distribution
 46 |     sns.histplot(residuals, kde=True, ax=axes[0, 1])
 47 |     axes[0, 1].set_title('Residuals Distribution')
 48 | 
 49 |     # Q-Q plot
 50 |     from scipy import stats
 51 |     stats.probplot(residuals, dist="norm", plot=axes[0, 2])
 52 |     axes[0, 2].set_title('Q-Q Plot')
 53 | 
 54 |     # Residuals vs Fitted
 55 |     axes[1, 0].scatter(y_pred, residuals, alpha=0.5)
 56 |     axes[1, 0].axhline(0, color='red', linestyle='--')
 57 |     axes[1, 0].set_title('Residuals vs Fitted Values')
 58 |     axes[1, 0].set_xlabel('Fitted Values')
 59 |     axes[1, 0].set_ylabel('Residuals')
 60 | 
 61 |     # Autocorrelation
 62 |     max_lags_acf = min(20, len(residuals) - 1)
 63 |     plot_acf(residuals, ax=axes[1, 1], lags=max_lags_acf)
 64 |     axes[1, 1].set_title('Residuals Autocorrelation')
 65 | 
 66 |     # Partial Autocorrelation
 67 |     max_lags_pacf = min(10, len(residuals) // 2 - 1)
 68 |     if max_lags_pacf > 0:
 69 |         plot_pacf(residuals, ax=axes[1, 2], lags=max_lags_pacf)
 70 |         axes[1, 2].set_title('Residuals Partial Autocorrelation')
 71 |     else:
 72 |         axes[1, 2].text(0.5, 0.5, 'Not enough data\nfor PACF', ha='center', va='center', transform=axes[1, 2].transAxes)
 73 |         axes[1, 2].set_title('Residuals Partial Autocorrelation')
 74 | 
 75 |     plt.tight_layout()
 76 |     return fig
 77 | 
 78 | def plot_model_comparison(models_metrics, metric='RMSE'):
 79 |     """
 80 |     Plot comparison of different models.
 81 |     """
 82 |     model_names = list(models_metrics.keys())
 83 |     values = [models_metrics[name][metric] for name in model_names]
 84 | 
 85 |     fig, ax = plt.subplots(figsize=(10, 6))
 86 |     bars = ax.bar(model_names, values, color='skyblue', edgecolor='navy', linewidth=1)
 87 | 
 88 |     ax.set_title(f'Model Comparison - {metric}')
 89 |     ax.set_xlabel('Models')
 90 |     ax.set_ylabel(metric)
 91 |     ax.tick_params(axis='x', rotation=45)
 92 | 
 93 |     # Add value labels on bars
 94 |     for bar, value in zip(bars, values):
 95 |         ax.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
 96 |                 f'{value:.4f}', ha='center', va='bottom')
 97 | 
 98 |     plt.tight_layout()
 99 |     return fig
100 | 
101 | def plot_feature_importance(model, feature_names, top_n=20):
102 |     """
103 |     Plot feature importance for tree-based models.
104 |     """
105 |     if hasattr(model, 'feature_importances_'):
106 |         importance = model.feature_importances_
107 |         indices = np.argsort(importance)[::-1][:top_n]
108 | 
109 |         plt.figure(figsize=(10, 8))
110 |         plt.title('Feature Importances')
111 |         plt.bar(range(top_n), importance[indices], align='center')
112 |         plt.xticks(range(top_n), [feature_names[i] for i in indices], rotation=90)
113 |         plt.tight_layout()
114 |         return plt.gcf()
115 |     else:
116 |         print("Model does not have feature_importances_ attribute")
117 |         return None
118 | 
119 | def plot_learning_curve(model, X_train, y_train, cv=5):
120 |     """
121 |     Plot learning curve for a model.
122 |     """
123 |     from sklearn.model_selection import learning_curve
124 | 
125 |     train_sizes, train_scores, val_scores = learning_curve(
126 |         model, X_train, y_train, cv=cv, n_jobs=-1,
127 |         train_sizes=np.linspace(0.1, 1.0, 10), scoring='neg_mean_squared_error'
128 |     )
129 | 
130 |     train_scores_mean = -np.mean(train_scores, axis=1)
131 |     train_scores_std = np.std(train_scores, axis=1)
132 |     val_scores_mean = -np.mean(val_scores, axis=1)
133 |     val_scores_std = np.std(val_scores, axis=1)
134 | 
135 |     plt.figure(figsize=(10, 6))
136 |     plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
137 |                      train_scores_mean + train_scores_std, alpha=0.1, color="r")
138 |     plt.fill_between(train_sizes, val_scores_mean - val_scores_std,
139 |                      val_scores_mean + val_scores_std, alpha=0.1, color="g")
140 |     plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score")
141 |     plt.plot(train_sizes, val_scores_mean, 'o-', color="g", label="Cross-validation score")
142 | 
143 |     plt.title('Learning Curve')
144 |     plt.xlabel('Training Size')
145 |     plt.ylabel('MSE')
146 |     plt.legend(loc="best")
147 |     plt.grid(True)
148 |     plt.tight_layout()
149 |     return plt.gcf()
150 | 
151 | def plot_prediction_intervals(y_true, y_pred, lower_bound, upper_bound, title='Prediction Intervals'):
152 |     """
153 |     Plot predictions with confidence intervals.
154 |     """
155 |     plt.figure(figsize=(12, 6))
156 |     plt.plot(y_true.index, y_true, label='Observed', color='blue', linewidth=2)
157 |     plt.plot(y_true.index, y_pred, label='Predicted', color='red', linewidth=2)
158 |     plt.fill_between(y_true.index, lower_bound, upper_bound, alpha=0.3, color='red', label='95% Prediction Interval')
159 | 
160 |     plt.title(title)
161 |     plt.xlabel('Date')
162 |     plt.ylabel('Value')
163 |     plt.legend()
164 |     plt.grid(True, alpha=0.3)
165 |     plt.tight_layout()
166 |     return plt.gcf()
167 | 
168 | def create_dashboard(y_true, y_pred, metrics, model_name):
169 |     """
170 |     Create a comprehensive dashboard with multiple plots.
171 |     """
172 |     fig = make_subplots(
173 |         rows=3, cols=2,
174 |         subplot_titles=('Forecast vs Actual', 'Residuals Distribution',
175 |                         'Residuals Over Time', 'Q-Q Plot',
176 |                         'Model Metrics', 'Feature Importance (if available)'),
177 |         specs=[[{"secondary_y": False}, {"secondary_y": False}],
178 |                [{"secondary_y": False}, {"secondary_y": False}],
179 |                [{"secondary_y": False}, {"secondary_y": False}]]
180 |     )
181 | 
182 |     # Forecast vs Actual
183 |     fig.add_trace(go.Scatter(x=y_true.index, y=y_true, mode='lines', name='Observed',
184 |                              line=dict(color='blue')), row=1, col=1)
185 |     fig.add_trace(go.Scatter(x=y_true.index, y=y_pred, mode='lines', name='Predicted',
186 |                              line=dict(color='red')), row=1, col=1)
187 | 
188 |     # Residuals Distribution
189 |     residuals = y_true - y_pred
190 |     fig.add_trace(go.Histogram(x=residuals, nbinsx=30, name='Residuals'), row=1, col=2)
191 | 
192 |     # Residuals Over Time
193 |     fig.add_trace(go.Scatter(x=y_true.index, y=residuals, mode='lines', name='Residuals Over Time',
194 |                              line=dict(color='green')), row=2, col=1)
195 | 
196 |     # Q-Q Plot
197 |     from scipy import stats
198 |     qq = stats.probplot(residuals, dist="norm")
199 |     fig.add_trace(go.Scatter(x=qq[0][0], y=qq[0][1], mode='markers', name='Q-Q Plot'), row=2, col=2)
200 | 
201 |     # Model Metrics
202 |     metrics_text = "<br>".join([f"{k}: {v:.4f}" for k, v in metrics.items() if isinstance(v, (int, float))])
203 |     fig.add_trace(go.Table(
204 |         header=dict(values=['Metric', 'Value']),
205 |         cells=dict(values=[list(metrics.keys()), [f"{v:.4f}" if isinstance(v, (int, float)) else str(v) for v in metrics.values()]])
206 |     ), row=3, col=1)
207 | 
208 |     # Placeholder for Feature Importance
209 |     fig.add_trace(go.Bar(x=['Feature 1', 'Feature 2'], y=[0.5, 0.3], name='Feature Importance'), row=3, col=2)
210 | 
211 |     fig.update_layout(height=1200, title_text=f"{model_name} - Comprehensive Dashboard")
212 |     return fig
213 | 
214 | def plot_seasonal_decomposition(ts, model='additive', period=None):
215 |     """
216 |     Plot seasonal decomposition of time series.
217 |     """
218 |     from statsmodels.tsa.seasonal import seasonal_decompose
219 | 
220 |     if period is None:
221 |         period = 12  # Assume monthly data
222 | 
223 |     decomposition = seasonal_decompose(ts, model=model, period=period)
224 | 
225 |     fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
226 | 
227 |     axes[0].plot(ts, label='Original')
228 |     axes[0].legend()
229 |     axes[0].set_title('Original Time Series')
230 | 
231 |     axes[1].plot(decomposition.trend, label='Trend')
232 |     axes[1].legend()
233 |     axes[1].set_title('Trend Component')
234 | 
235 |     axes[2].plot(decomposition.seasonal, label='Seasonal')
236 |     axes[2].legend()
237 |     axes[2].set_title('Seasonal Component')
238 | 
239 |     axes[3].plot(decomposition.resid, label='Residual')
240 |     axes[3].legend()
241 |     axes[3].set_title('Residual Component')
242 | 
243 |     plt.tight_layout()
244 |     return fig
245 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🚀 Advanced Time Series Forecasting Tool
  2 | 
  3 | A comprehensive, enterprise-grade Streamlit web application for professional time series forecasting with cutting-edge AI capabilities. This tool combines 13+ forecasting models, advanced evaluation metrics, interactive visualizations, and AI-powered insights to deliver state-of-the-art time series analysis.
  4 | 
  5 | ## 🌟 Key Features
  6 | 
  7 | ### 🤖 **13 Advanced Forecasting Models**
  8 | - **Machine Learning**: XGBoost, Random Forest, Gradient Boosting, AdaBoost, SVR, MLP
  9 | - **Deep Learning**: LSTM, Bidirectional LSTM, GRU
 10 | - **Statistical**: ARIMA, SARIMA
 11 | - **Ensemble Methods**: Voting Regressor, Stacking Ensemble
 12 | 
 13 | ### 📊 **Comprehensive Evaluation Suite**
 14 | - **Basic Metrics**: MAE, RMSE, MAPE, R² Score
 15 | - **Advanced Metrics**: SMAPE, RMSPE, MASE, MDA, Theil's U, Forecast Bias
 16 | - **Statistical Tests**: Normality, autocorrelation, heteroscedasticity analysis
 17 | 
 18 | ### 🎨 **Interactive Visualizations**
 19 | - Plotly-powered interactive forecast plots
 20 | - Comprehensive residual analysis (distribution, ACF, Q-Q plots)
 21 | - Seasonal decomposition with trend/seasonal/residual components
 22 | - Model comparison dashboards
 23 | - Feature importance analysis with SHAP values
 24 | 
 25 | ### 🛠️ **Advanced Data Processing**
 26 | - Automatic data validation and intelligent cleaning
 27 | - Smart lag feature generation for temporal dependencies
 28 | - Multiple missing value imputation strategies
 29 | - Outlier detection and handling
 30 | - Feature scaling (Standard, MinMax, Robust)
 31 | - Time-based feature engineering
 32 | 
 33 | ### 🎛️ **Professional User Interface**
 34 | - Drag-and-drop CSV upload with real-time validation
 35 | - Dynamic parameter tuning with live updates
 36 | - Hyperparameter optimization with GridSearchCV
 37 | - Model comparison and benchmarking
 38 | - Export capabilities (predictions, metrics, models)
 39 | 
 40 | ### 🔬 **AI-Powered Features**
 41 | - **SHAP Analysis**: Explainable AI for model interpretability
 42 | - **Feature Importance**: Global and local feature impact analysis
 43 | - **Time Series Cross-Validation**: Rolling forecast validation
 44 | - **Automated Model Selection**: Performance-based recommendations
 45 | 
 46 | ## 🚀 Quick Start
 47 | 
 48 | ### Installation
 49 | 
 50 | 1. **Clone the repository**:
 51 | ```bash
 52 | git clone https://github.com/your-username/time-series-forecasting-tool.git
 53 | cd time-series-forecasting-tool
 54 | ```
 55 | 
 56 | 2. **Install dependencies**:
 57 | ```bash
 58 | pip install -r requirements.txt
 59 | ```
 60 | 
 61 | 3. **Launch the application**:
 62 | ```bash
 63 | streamlit run app_advanced.py
 64 | ```
 65 | 
 66 | 4. **Open your browser** to `http://localhost:8501`
 67 | 
 68 | ### Usage Guide
 69 | 
 70 | 1. **📁 Data Upload**: Upload your CSV file or use the built-in sample dataset
 71 | 2. **⚙️ Preprocessing**: Configure lag features, test size, and scaling options
 72 | 3. **🤖 Model Selection**: Choose from 13 forecasting models with pre-configured parameters
 73 | 4. **🔬 Advanced Features**: Enable SHAP analysis, cross-validation, and feature importance
 74 | 5. **📊 Evaluation**: Review comprehensive metrics and statistical analysis
 75 | 6. **📈 Visualization**: Explore interactive charts and residual diagnostics
 76 | 7. **💾 Export**: Download predictions, metrics reports, and trained models
 77 | 
 78 | ## 🏗️ Project Architecture
 79 | 
 80 | ```
 81 | ├── app_advanced.py         # Main advanced Streamlit application
 82 | ├── app.py                  # Basic Streamlit application
 83 | ├── config.py              # Configuration and hyperparameters
 84 | ├── data_loader.py         # Data loading and preprocessing utilities
 85 | ├── models.py              # Core ML models (XGBoost, RF, LSTM)
 86 | ├── advanced_models.py     # Additional models and ensemble methods
 87 | ├── evaluation.py          # Basic evaluation metrics
 88 | ├── advanced_evaluation.py # Comprehensive evaluation suite
 89 | ├── viz.py                 # Basic visualization functions
 90 | ├── advanced_viz.py        # Interactive and advanced visualizations
 91 | ├── utils.py               # Utility functions (validation, scaling, etc.)
 92 | ├── requirements.txt       # Python dependencies
 93 | ├── TODO.md                # Development roadmap
 94 | └── README.md              # This documentation
 95 | ```
 96 | 
 97 | ## 📋 Requirements
 98 | 
 99 | ### Core Dependencies
100 | - **streamlit** (>=1.28.0): Web application framework
101 | - **tensorflow** (>=2.13.0): Deep learning models
102 | - **scikit-learn** (>=1.3.0): Machine learning algorithms
103 | - **pandas** (>=2.0.0): Data manipulation
104 | - **numpy** (>=1.24.0): Numerical computing
105 | 
106 | ### Visualization & Analysis
107 | - **plotly** (>=5.15.0): Interactive visualizations
108 | - **matplotlib** (>=3.7.0): Static plotting
109 | - **seaborn** (>=0.12.0): Statistical visualization
110 | - **statsmodels** (>=0.14.0): Statistical models
111 | 
112 | ### Specialized Libraries
113 | - **xgboost** (>=1.7.0): Gradient boosting
114 | - **shap** (>=0.42.0): Explainable AI (optional)
115 | - **joblib** (>=1.3.0): Model serialization
116 | - **scipy** (>=1.11.0): Scientific computing
117 | 
118 | ## 🎯 Model Capabilities
119 | 
120 | ### Machine Learning Models
121 | | Model | Description | Best For |
122 | |-------|-------------|----------|
123 | | XGBoost | Gradient boosting with trees | High accuracy, feature importance |
124 | | Random Forest | Ensemble of decision trees | Robust, handles missing data |
125 | | Gradient Boosting | Sequential ensemble method | Competitive accuracy |
126 | | AdaBoost | Adaptive boosting | Binary classification adaptation |
127 | | SVR | Support Vector Regression | Non-linear relationships |
128 | | MLP | Neural network | Complex patterns |
129 | 
130 | ### Deep Learning Models
131 | | Model | Description | Best For |
132 | |-------|-------------|----------|
133 | | LSTM | Long Short-Term Memory | Sequential dependencies |
134 | | Bidirectional LSTM | Forward + backward LSTM | Context-aware forecasting |
135 | | GRU | Gated Recurrent Units | Efficient sequential modeling |
136 | 
137 | ### Statistical Models
138 | | Model | Description | Best For |
139 | |-------|-------------|----------|
140 | | ARIMA | AutoRegressive Integrated MA | Stationary time series |
141 | | SARIMA | Seasonal ARIMA | Seasonal patterns |
142 | 
143 | ### Ensemble Methods
144 | | Model | Description | Best For |
145 | |-------|-------------|----------|
146 | | Voting Ensemble | Weighted average of models | Improved stability |
147 | | Stacking Ensemble | Meta-model on base predictions | Maximum accuracy |
148 | 
149 | ## 📊 Evaluation Framework
150 | 
151 | ### Performance Metrics
152 | - **MAE**: Mean Absolute Error - Average magnitude of errors
153 | - **RMSE**: Root Mean Squared Error - Penalizes large errors
154 | - **MAPE**: Mean Absolute Percentage Error - Scale-independent
155 | - **SMAPE**: Symmetric MAPE - Handles zero values
156 | - **MASE**: Mean Absolute Scaled Error - Compares to naive forecast
157 | - **MDA**: Mean Directional Accuracy - Direction prediction accuracy
158 | 
159 | ### Statistical Analysis
160 | - **Normality Tests**: Shapiro-Wilk, Kolmogorov-Smirnov
161 | - **Autocorrelation**: ACF/PACF analysis for residuals
162 | - **Heteroscedasticity**: Breusch-Pagan, White tests
163 | - **Stationarity**: Augmented Dickey-Fuller test
164 | 
165 | ## 🔬 Advanced Features
166 | 
167 | ### SHAP Explainability
168 | - Global feature importance across all predictions
169 | - Local explanations for individual forecasts
170 | - Waterfall plots showing feature contributions
171 | - Summary plots for feature impact analysis
172 | 
173 | ### Cross-Validation
174 | - Time series split validation
175 | - Rolling forecast evaluation
176 | - Performance stability assessment
177 | - Overfitting detection
178 | 
179 | ### Feature Engineering
180 | - Automatic lag feature creation
181 | - Rolling statistics (mean, std, min, max)
182 | - Seasonal indicators
183 | - Calendar features (day of week, month, quarter)
184 | 
185 | ## 🎨 User Interface
186 | 
187 | ### Dashboard Layout
188 | - **Header**: Professional branding with gradient styling
189 | - **Sidebar**: Organized controls for data, preprocessing, and models
190 | - **Main Panel**: Metrics cards, visualizations, and results
191 | - **Export Section**: Download options for results and models
192 | 
193 | ### Responsive Design
194 | - Mobile-friendly layout
195 | - Collapsible sidebar
196 | - Progressive disclosure of advanced options
197 | - Real-time feedback and progress indicators
198 | 
199 | ## 🚀 Deployment Options
200 | 
201 | ### Local Development
202 | ```bash
203 | streamlit run app_advanced.py --server.port 8501 --server.address 0.0.0.0
204 | ```
205 | 
206 | ### Docker Deployment
207 | ```dockerfile
208 | FROM python:3.11-slim
209 | COPY . /app
210 | WORKDIR /app
211 | RUN pip install -r requirements.txt
212 | EXPOSE 8501
213 | CMD ["streamlit", "run", "app_advanced.py", "--server.address", "0.0.0.0"]
214 | ```
215 | 
216 | ### Cloud Platforms
217 | - **Streamlit Cloud**: Direct deployment from GitHub
218 | - **Heroku**: Container-based deployment
219 | - **AWS/GCP/Azure**: Scalable cloud deployment
220 | - **Docker Hub**: Containerized distribution
221 | 
222 | ## 🤝 Contributing
223 | 
224 | We welcome contributions! Please follow these steps:
225 | 
226 | 1. **Fork** the repository
227 | 2. **Create** a feature branch (`git checkout -b feature/AmazingFeature`)
228 | 3. **Commit** changes (`git commit -m 'Add AmazingFeature'`)
229 | 4. **Push** to branch (`git push origin feature/AmazingFeature`)
230 | 5. **Open** a Pull Request
231 | 
232 | ### Development Guidelines
233 | - Follow PEP 8 style guidelines
234 | - Add docstrings to all functions
235 | - Include unit tests for new features
236 | - Update documentation for API changes
237 | - Ensure backward compatibility
238 | 
239 | ## 📄 License
240 | 
241 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
242 | 
243 | ## 🙏 Acknowledgments
244 | 
245 | - **Streamlit** for the amazing web app framework
246 | - **TensorFlow/Keras** for deep learning capabilities
247 | - **scikit-learn** for comprehensive ML algorithms
248 | - **Plotly** for interactive visualizations
249 | - **SHAP** for model explainability
250 | - **statsmodels** for statistical modeling
251 | 
252 | ## 🔮 Future Roadmap
253 | 
254 | ### Phase 1 (Completed)
255 | - ✅ 13 forecasting models implementation
256 | - ✅ Comprehensive evaluation metrics
257 | - ✅ Interactive visualizations
258 | - ✅ Professional UI/UX
259 | 
260 | ### Phase 2 (In Progress)
261 | - 🔄 Prophet model integration
262 | - 🔄 Automated model selection
263 | - 🔄 Prediction intervals
264 | - 🔄 Multi-step forecasting
265 | 
266 | ### Phase 3 (Planned)
267 | - 📋 Real-time forecasting dashboard
268 | - 📋 Anomaly detection system
269 | - 📋 Model deployment API
270 | - 📋 Database integration
271 | - 📋 Performance monitoring
272 | 
273 | ## 📞 Support
274 | 
275 | For questions, issues, or contributions:
276 | 
277 | - **GitHub Issues**: Bug reports and feature requests
278 | - **Discussions**: General questions and community support
279 | - **Pull Requests**: Code contributions welcome
280 | 
281 | ## 🎉 Getting Started
282 | 
283 | Ready to forecast? Get started in minutes:
284 | 
285 | 1. Clone the repo
286 | 2. Install dependencies
287 | 3. Run `streamlit run app_advanced.py`
288 | 4. Upload your data and start forecasting!
289 | 
290 | ---
291 | 
292 | **Built with ❤️ for the data science community**
293 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import numpy as np
  4 | from data_loader import load_data, preprocess_data, create_sample_data
  5 | from models import train_xgboost, train_random_forest, train_lstm, tune_hyperparameters
  6 | from advanced_models import (train_gradient_boosting, train_ada_boost, train_svr, train_mlp,
  7 |                              train_bidirectional_lstm, train_gru, train_arima, train_sarima,
  8 |                              ensemble_forecast)
  9 | from evaluation import evaluate_model
 10 | from advanced_evaluation import comprehensive_evaluation, print_evaluation_report
 11 | from viz import plot_forecast, plot_residuals, plot_distribution
 12 | from advanced_viz import (plot_forecast_interactive, plot_residuals_analysis, plot_model_comparison,
 13 |                           create_dashboard, plot_seasonal_decomposition)
 14 | from config import DEFAULT_PARAMS, TUNING_GRIDS, DATA_SETTINGS, APP_SETTINGS
 15 | from utils import (validate_data, scale_features, save_model, load_model,
 16 |                    handle_missing_values, create_time_features, detect_outliers)
 17 | import matplotlib.pyplot as plt
 18 | import plotly.graph_objects as go
 19 | 
 20 | # Set page configuration
 21 | st.set_page_config(
 22 |     page_title="Advanced Time Series Forecasting",
 23 |     page_icon="📈",
 24 |     layout="wide",
 25 |     initial_sidebar_state="expanded"
 26 | )
 27 | 
 28 | # Custom CSS for better styling
 29 | st.markdown("""
 30 | <style>
 31 |     .main-header {
 32 |         font-size: 2.5rem;
 33 |         font-weight: bold;
 34 |         color: #1f77b4;
 35 |         text-align: center;
 36 |         margin-bottom: 2rem;
 37 |         padding: 1rem;
 38 |         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
 39 |         -webkit-background-clip: text;
 40 |         -webkit-text-fill-color: transparent;
 41 |         background-clip: text;
 42 |     }
 43 |     .sidebar-header {
 44 |         font-size: 1.2rem;
 45 |         font-weight: bold;
 46 |         color: #2c3e50;
 47 |         margin-bottom: 1rem;
 48 |     }
 49 |     .metric-card {
 50 |         background-color: #f8f9fa;
 51 |         padding: 1rem;
 52 |         border-radius: 0.5rem;
 53 |         border-left: 4px solid #1f77b4;
 54 |         margin-bottom: 1rem;
 55 |     }
 56 |     .metric-value {
 57 |         font-size: 1.5rem;
 58 |         font-weight: bold;
 59 |         color: #1f77b4;
 60 |     }
 61 |     .metric-label {
 62 |         font-size: 0.9rem;
 63 |         color: #6c757d;
 64 |         text-transform: uppercase;
 65 |     }
 66 |     .section-header {
 67 |         font-size: 1.5rem;
 68 |         font-weight: bold;
 69 |         color: #2c3e50;
 70 |         margin-top: 2rem;
 71 |         margin-bottom: 1rem;
 72 |         padding-bottom: 0.5rem;
 73 |         border-bottom: 2px solid #e9ecef;
 74 |     }
 75 |     .stButton>button {
 76 |         background-color: #1f77b4;
 77 |         color: white;
 78 |         border: none;
 79 |         border-radius: 0.3rem;
 80 |         padding: 0.5rem 1rem;
 81 |         font-weight: bold;
 82 |     }
 83 |     .stButton>button:hover {
 84 |         background-color: #155a8a;
 85 |         color: white;
 86 |     }
 87 |     .stCheckbox>label {
 88 |         font-weight: 500;
 89 |     }
 90 |     .stSelectbox>label, .stSlider>label, .stFileUploader>label {
 91 |         font-weight: 600;
 92 |         color: #2c3e50;
 93 |     }
 94 | 
 95 |     /* Force sidebar to be light theme with comprehensive text visibility */
 96 |     [data-testid="stSidebar"] {
 97 |         background-color: #ffffff !important;
 98 |         color: #2c3e50 !important;
 99 |     }
100 | 
101 |     /* Target all possible text elements in sidebar */
102 |     [data-testid="stSidebar"] *,
103 |     [data-testid="stSidebar"] p,
104 |     [data-testid="stSidebar"] span,
105 |     [data-testid="stSidebar"] div,
106 |     [data-testid="stSidebar"] label,
107 |     [data-testid="stSidebar"] h1,
108 |     [data-testid="stSidebar"] h2,
109 |     [data-testid="stSidebar"] h3,
110 |     [data-testid="stSidebar"] h4,
111 |     [data-testid="stSidebar"] h5,
112 |     [data-testid="stSidebar"] h6,
113 |     [data-testid="stSidebar"] strong,
114 |     [data-testid="stSidebar"] b,
115 |     [data-testid="stSidebar"] em,
116 |     [data-testid="stSidebar"] i {
117 |         color: #2c3e50 !important;
118 |     }
119 | 
120 |     /* Specific Streamlit component overrides */
121 |     [data-testid="stSidebar"] .stMarkdown {
122 |         color: #2c3e50 !important;
123 |     }
124 | 
125 |     [data-testid="stSidebar"] .stMarkdown p {
126 |         color: #2c3e50 !important;
127 |     }
128 | 
129 |     [data-testid="stSidebar"] .stSuccess {
130 |         color: #28a745 !important;
131 |     }
132 | 
133 |     [data-testid="stSidebar"] .stSuccess p {
134 |         color: #28a745 !important;
135 |     }
136 | 
137 |     [data-testid="stSidebar"] .stInfo {
138 |         color: #17a2b8 !important;
139 |     }
140 | 
141 |     [data-testid="stSidebar"] .stInfo p {
142 |         color: #17a2b8 !important;
143 |     }
144 | 
145 |     [data-testid="stSidebar"] .stWarning {
146 |         color: #ffc107 !important;
147 |     }
148 | 
149 |     [data-testid="stSidebar"] .stWarning p {
150 |         color: #ffc107 !important;
151 |     }
152 | 
153 |     [data-testid="stSidebar"] .stError {
154 |         color: #dc3545 !important;
155 |     }
156 | 
157 |     [data-testid="stSidebar"] .stError p {
158 |         color: #dc3545 !important;
159 |     }
160 | 
161 |     /* Form elements */
162 |     [data-testid="stSidebar"] input,
163 |     [data-testid="stSidebar"] select,
164 |     [data-testid="stSidebar"] textarea,
165 |     [data-testid="stSidebar"] button {
166 |         color: #2c3e50 !important;
167 |     }
168 | 
169 |     /* Slider and select labels */
170 |     [data-testid="stSidebar"] .stSlider label,
171 |     [data-testid="stSidebar"] .stSelectbox label,
172 |     [data-testid="stSidebar"] .stFileUploader label {
173 |         color: #2c3e50 !important;
174 |         font-weight: 600 !important;
175 |     }
176 | 
177 |     /* Fallback for older Streamlit versions */
178 |     .css-1d391kg, .css-12oz5g7 {
179 |         background-color: #ffffff !important;
180 |         color: #2c3e50 !important;
181 |     }
182 | 
183 |     .css-1d391kg *, .css-12oz5g7 * {
184 |         color: #2c3e50 !important;
185 |     }
186 | 
187 |     .css-1d391kg p, .css-12oz5g7 p,
188 |     .css-1d391kg span, .css-12oz5g7 span,
189 |     .css-1d391kg div, .css-12oz5g7 div {
190 |         color: #2c3e50 !important;
191 |     }
192 | </style>
193 | """, unsafe_allow_html=True)
194 | 
195 | # Main header with custom styling
196 | st.markdown('<h1 class="main-header">📈 Advanced Time Series Forecasting Tool</h1>', unsafe_allow_html=True)
197 | st.markdown('<p style="text-align: center; font-size: 1.1rem; color: #6c757d; margin-bottom: 2rem;">Professional time series analysis with 11+ forecasting models and interactive visualizations</p>', unsafe_allow_html=True)
198 | 
199 | # Sidebar for inputs
200 | st.sidebar.markdown('<p class="sidebar-header">📁 Data Upload</p>', unsafe_allow_html=True)
201 | uploaded_file = st.sidebar.file_uploader('Upload CSV file', type=['csv'])
202 | if uploaded_file is not None:
203 |     df = load_data(uploaded_file)
204 |     st.sidebar.success('✅ Data loaded successfully!')
205 | else:
206 |     st.sidebar.info('ℹ️ Using sample data.')
207 |     df = create_sample_data()
208 | 
209 | st.sidebar.markdown('<p class="sidebar-header">⚙️ Preprocessing</p>', unsafe_allow_html=True)
210 | lags = st.sidebar.slider('Number of lag features', 1, 10, 5)
211 | test_size = st.sidebar.slider('Test size', 0.1, 0.5, 0.2)
212 | 
213 | X_train, X_test, y_train, y_test = preprocess_data(df, lags=lags, test_size=test_size)
214 | 
215 | st.sidebar.markdown('<p class="sidebar-header">🤖 Model Selection</p>', unsafe_allow_html=True)
216 | model_options = ['XGBoost', 'Random Forest', 'LSTM', 'Gradient Boosting', 'AdaBoost', 'SVR', 'MLP', 'Bidirectional LSTM', 'GRU', 'ARIMA', 'SARIMA']
217 | model_choice = st.sidebar.selectbox('Choose model', model_options)
218 | 
219 | st.sidebar.markdown('<p class="sidebar-header">🔧 Hyperparameters</p>', unsafe_allow_html=True)
220 | if model_choice in DEFAULT_PARAMS:
221 |     params = DEFAULT_PARAMS[model_choice].copy()
222 |     for param_name, default_value in params.items():
223 |         if isinstance(default_value, int):
224 |             params[param_name] = st.sidebar.slider(param_name, 1, 200, default_value)
225 |         elif isinstance(default_value, float):
226 |             params[param_name] = st.sidebar.slider(param_name, 0.001, 1.0, default_value)
227 | else:
228 |     st.sidebar.warning(f"Default parameters not set for {model_choice}. Using basic settings.")
229 |     params = {}
230 | 
231 | tune = st.sidebar.checkbox('Tune hyperparameters?')
232 | if tune and model_choice in TUNING_GRIDS:
233 |     param_grid = TUNING_GRIDS[model_choice]
234 |     model, best_params = tune_hyperparameters(model_choice.lower().replace(' ', '_'), X_train, y_train, param_grid)
235 |     st.sidebar.write('Best params:', best_params)
236 | else:
237 |     if model_choice == 'XGBoost':
238 |         model = train_xgboost(X_train, y_train, params)
239 |     elif model_choice == 'Random Forest':
240 |         model = train_random_forest(X_train, y_train, params)
241 |     elif model_choice == 'LSTM':
242 |         model = train_lstm(X_train, y_train, params)
243 |     elif model_choice == 'Gradient Boosting':
244 |         model = train_gradient_boosting(X_train, y_train, params)
245 |     elif model_choice == 'AdaBoost':
246 |         model = train_ada_boost(X_train, y_train, params)
247 |     elif model_choice == 'SVR':
248 |         model = train_svr(X_train, y_train, params)
249 |     elif model_choice == 'MLP':
250 |         model = train_mlp(X_train, y_train, params)
251 |     elif model_choice == 'Bidirectional LSTM':
252 |         model = train_bidirectional_lstm(X_train, y_train, params)
253 |     elif model_choice == 'GRU':
254 |         model = train_gru(X_train, y_train, params)
255 |     elif model_choice == 'ARIMA':
256 |         model = train_arima(y_train)
257 |     elif model_choice == 'SARIMA':
258 |         model = train_sarima(y_train)
259 |     else:
260 |         st.error(f"Model {model_choice} not implemented yet.")
261 |         st.stop()
262 | 
263 | # Train and predict
264 | if model_choice in ['LSTM', 'Bidirectional LSTM', 'GRU']:
265 |     X_test_reshaped = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))
266 |     y_pred = model.predict(X_test_reshaped).flatten()
267 | elif model_choice in ['ARIMA', 'SARIMA']:
268 |     # For ARIMA/SARIMA, forecast the test period
269 |     y_pred = model.forecast(steps=len(y_test))
270 |     y_pred.index = y_test.index
271 | else:
272 |     y_pred = model.predict(X_test)
273 | 
274 | # Evaluate
275 | metrics = evaluate_model(y_test, y_pred)
276 | 
277 | # Display metrics in styled cards
278 | st.markdown('<h2 class="section-header">📊 Model Evaluation</h2>', unsafe_allow_html=True)
279 | 
280 | col1, col2, col3, col4 = st.columns(4)
281 | with col1:
282 |     st.markdown(f"""
283 |     <div class="metric-card">
284 |         <div class="metric-label">MAE</div>
285 |         <div class="metric-value">{metrics['MAE']:.4f}</div>
286 |     </div>
287 |     """, unsafe_allow_html=True)
288 | 
289 | with col2:
290 |     st.markdown(f"""
291 |     <div class="metric-card">
292 |         <div class="metric-label">RMSE</div>
293 |         <div class="metric-value">{metrics['RMSE']:.4f}</div>
294 |     </div>
295 |     """, unsafe_allow_html=True)
296 | 
297 | with col3:
298 |     st.markdown(f"""
299 |     <div class="metric-card">
300 |         <div class="metric-label">MAPE</div>
301 |         <div class="metric-value">{metrics['MAPE']:.2f}%</div>
302 |     </div>
303 |     """, unsafe_allow_html=True)
304 | 
305 | with col4:
306 |     st.markdown(f"""
307 |     <div class="metric-card">
308 |         <div class="metric-label">R² Score</div>
309 |         <div class="metric-value">{metrics['R2']:.4f}</div>
310 |     </div>
311 |     """, unsafe_allow_html=True)
312 | 
313 | # Advanced evaluation option
314 | if st.checkbox('🔬 Show Advanced Metrics'):
315 |     adv_metrics = comprehensive_evaluation(y_test, y_pred, y_train)
316 |     st.markdown('<h3 class="section-header">Advanced Performance Metrics</h3>', unsafe_allow_html=True)
317 |     for key, value in adv_metrics.items():
318 |         if isinstance(value, dict):
319 |             st.markdown(f"**{key}:**")
320 |             for sub_key, sub_value in value.items():
321 |                 st.write(f"  - {sub_key}: {sub_value}")
322 |         else:
323 |             st.write(f"**{key}:** {value}")
324 | 
325 | # Visualize
326 | st.markdown('<h2 class="section-header">📈 Visualizations</h2>', unsafe_allow_html=True)
327 | 
328 | # Interactive Plot
329 | if st.checkbox('📊 Show Interactive Plot'):
330 |     interactive_fig = plot_forecast_interactive(y_test, y_pred)
331 |     st.plotly_chart(interactive_fig)
332 | 
333 | # Static plots
334 | st.markdown('<h3 class="section-header">Basic Analysis</h3>', unsafe_allow_html=True)
335 | col1, col2 = st.columns(2)
336 | 
337 | with col1:
338 |     fig1, ax1 = plt.subplots(figsize=(8, 5))
339 |     ax1.plot(y_test.index, y_test, label='Observed', color='blue', linewidth=2)
340 |     ax1.plot(y_test.index, y_pred, label='Predicted', color='red', linewidth=2, linestyle='--')
341 |     ax1.set_xlabel('Date', fontsize=12)
342 |     ax1.set_ylabel('Value', fontsize=12)
343 |     ax1.set_title('Forecast vs Actual', fontsize=14, fontweight='bold')
344 |     ax1.legend()
345 |     ax1.grid(True, alpha=0.3)
346 |     st.pyplot(fig1)
347 | 
348 | with col2:
349 |     residuals = y_test - y_pred
350 |     fig2, ax2 = plt.subplots(figsize=(8, 5))
351 |     ax2.plot(residuals.index, residuals, color='green', linewidth=2)
352 |     ax2.axhline(0, color='black', linestyle='--', linewidth=1)
353 |     ax2.set_xlabel('Date', fontsize=12)
354 |     ax2.set_ylabel('Residuals', fontsize=12)
355 |     ax2.set_title('Residuals Over Time', fontsize=14, fontweight='bold')
356 |     ax2.grid(True, alpha=0.3)
357 |     st.pyplot(fig2)
358 | 
359 | # Residuals distribution
360 | st.markdown('<h3 class="section-header">Residuals Distribution</h3>', unsafe_allow_html=True)
361 | fig3, ax3 = plt.subplots(figsize=(10, 6))
362 | ax3.hist(residuals, bins=20, alpha=0.7, color='purple', edgecolor='black')
363 | ax3.set_title('Residuals Distribution', fontsize=14, fontweight='bold')
364 | ax3.set_xlabel('Residual Value', fontsize=12)
365 | ax3.set_ylabel('Frequency', fontsize=12)
366 | ax3.grid(True, alpha=0.3)
367 | st.pyplot(fig3)
368 | 
369 | # Advanced visualizations
370 | if st.checkbox('🔬 Show Advanced Visualizations'):
371 |     st.markdown('<h3 class="section-header">Comprehensive Residuals Analysis</h3>', unsafe_allow_html=True)
372 |     res_fig = plot_residuals_analysis(y_test, y_pred)
373 |     st.pyplot(res_fig)
374 | 
375 |     st.markdown('<h3 class="section-header">Seasonal Decomposition</h3>', unsafe_allow_html=True)
376 |     try:
377 |         seasonal_fig = plot_seasonal_decomposition(df[DATA_SETTINGS['target_col']])
378 |         st.pyplot(seasonal_fig)
379 |     except:
380 |         st.warning("⚠️ Seasonal decomposition requires more data points (minimum 24 observations recommended).")
381 | 


--------------------------------------------------------------------------------
/app_advanced.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import numpy as np
  4 | from data_loader import load_data, preprocess_data, create_sample_data
  5 | from models import train_xgboost, train_random_forest, train_lstm, tune_hyperparameters
  6 | from advanced_models import (train_gradient_boosting, train_ada_boost, train_svr, train_mlp,
  7 |                              train_bidirectional_lstm, train_gru, train_arima, train_sarima,
  8 |                              ensemble_forecast)
  9 | from evaluation import evaluate_model
 10 | from advanced_evaluation import comprehensive_evaluation, print_evaluation_report
 11 | from viz import plot_forecast, plot_residuals, plot_distribution
 12 | from advanced_viz import (plot_forecast_interactive, plot_residuals_analysis, plot_model_comparison,
 13 |                           create_dashboard, plot_seasonal_decomposition)
 14 | from config import DEFAULT_PARAMS, TUNING_GRIDS, DATA_SETTINGS, APP_SETTINGS
 15 | from utils import (validate_data, scale_features, save_model, load_model,
 16 |                    handle_missing_values, create_time_features, detect_outliers)
 17 | import matplotlib.pyplot as plt
 18 | import plotly.graph_objects as go
 19 | import warnings
 20 | warnings.filterwarnings('ignore')
 21 | 
 22 | # Import additional libraries for advanced features
 23 | try:
 24 |     import shap
 25 |     SHAP_AVAILABLE = True
 26 | except ImportError:
 27 |     SHAP_AVAILABLE = False
 28 | 
 29 | try:
 30 |     from sklearn.ensemble import VotingRegressor
 31 |     from sklearn.model_selection import TimeSeriesSplit
 32 |     VOTING_AVAILABLE = True
 33 | except ImportError:
 34 |     VOTING_AVAILABLE = False
 35 | 
 36 | # Set page configuration
 37 | st.set_page_config(
 38 |     page_title="Advanced Time Series Forecasting Pro",
 39 |     page_icon="🚀",
 40 |     layout="wide",
 41 |     initial_sidebar_state="expanded"
 42 | )
 43 | 
 44 | # Custom CSS for better styling
 45 | st.markdown("""
 46 | <style>
 47 |     .main-header {
 48 |         font-size: 2.5rem;
 49 |         font-weight: bold;
 50 |         color: #1f77b4;
 51 |         text-align: center;
 52 |         margin-bottom: 2rem;
 53 |         padding: 1rem;
 54 |         background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
 55 |         -webkit-background-clip: text;
 56 |         -webkit-text-fill-color: transparent;
 57 |         background-clip: text;
 58 |     }
 59 |     .sidebar-header {
 60 |         font-size: 1.2rem;
 61 |         font-weight: bold;
 62 |         color: #2c3e50;
 63 |         margin-bottom: 1rem;
 64 |     }
 65 |     .metric-card {
 66 |         background-color: #f8f9fa;
 67 |         padding: 1rem;
 68 |         border-radius: 0.5rem;
 69 |         border-left: 4px solid #1f77b4;
 70 |         margin-bottom: 1rem;
 71 |     }
 72 |     .metric-value {
 73 |         font-size: 1.5rem;
 74 |         font-weight: bold;
 75 |         color: #1f77b4;
 76 |     }
 77 |     .metric-label {
 78 |         font-size: 0.9rem;
 79 |         color: #6c757d;
 80 |         text-transform: uppercase;
 81 |     }
 82 |     .section-header {
 83 |         font-size: 1.5rem;
 84 |         font-weight: bold;
 85 |         color: #2c3e50;
 86 |         margin-top: 2rem;
 87 |         margin-bottom: 1rem;
 88 |         padding-bottom: 0.5rem;
 89 |         border-bottom: 2px solid #e9ecef;
 90 |     }
 91 |     .stButton>button {
 92 |         background-color: #1f77b4;
 93 |         color: white;
 94 |         border: none;
 95 |         border-radius: 0.3rem;
 96 |         padding: 0.5rem 1rem;
 97 |         font-weight: bold;
 98 |     }
 99 |     .stButton>button:hover {
100 |         background-color: #155a8a;
101 |         color: white;
102 |     }
103 |     .stCheckbox>label {
104 |         font-weight: 500;
105 |     }
106 |     .stSelectbox>label, .stSlider>label, .stFileUploader>label {
107 |         font-weight: 600;
108 |         color: #2c3e50;
109 |     }
110 | 
111 |     /* Force sidebar to be light theme with comprehensive text visibility */
112 |     [data-testid="stSidebar"] {
113 |         background-color: #ffffff !important;
114 |         color: #2c3e50 !important;
115 |     }
116 | 
117 |     /* Target all possible text elements in sidebar */
118 |     [data-testid="stSidebar"] *,
119 |     [data-testid="stSidebar"] p,
120 |     [data-testid="stSidebar"] span,
121 |     [data-testid="stSidebar"] div,
122 |     [data-testid="stSidebar"] label,
123 |     [data-testid="stSidebar"] h1,
124 |     [data-testid="stSidebar"] h2,
125 |     [data-testid="stSidebar"] h3,
126 |     [data-testid="stSidebar"] h4,
127 |     [data-testid="stSidebar"] h5,
128 |     [data-testid="stSidebar"] h6,
129 |     [data-testid="stSidebar"] strong,
130 |     [data-testid="stSidebar"] b,
131 |     [data-testid="stSidebar"] em,
132 |     [data-testid="stSidebar"] i {
133 |         color: #2c3e50 !important;
134 |     }
135 | 
136 |     /* Specific Streamlit component overrides */
137 |     [data-testid="stSidebar"] .stMarkdown {
138 |         color: #2c3e50 !important;
139 |     }
140 | 
141 |     [data-testid="stSidebar"] .stMarkdown p {
142 |         color: #2c3e50 !important;
143 |     }
144 | 
145 |     [data-testid="stSidebar"] .stSuccess {
146 |         color: #28a745 !important;
147 |     }
148 | 
149 |     [data-testid="stSidebar"] .stSuccess p {
150 |         color: #28a745 !important;
151 |     }
152 | 
153 |     [data-testid="stSidebar"] .stInfo {
154 |         color: #17a2b8 !important;
155 |     }
156 | 
157 |     [data-testid="stSidebar"] .stInfo p {
158 |         color: #17a2b8 !important;
159 |     }
160 | 
161 |     [data-testid="stSidebar"] .stWarning {
162 |         color: #ffc107 !important;
163 |     }
164 | 
165 |     [data-testid="stSidebar"] .stWarning p {
166 |         color: #ffc107 !important;
167 |     }
168 | 
169 |     [data-testid="stSidebar"] .stError {
170 |         color: #dc3545 !important;
171 |     }
172 | 
173 |     [data-testid="stSidebar"] .stError p {
174 |         color: #dc3545 !important;
175 |     }
176 | 
177 |     /* Form elements */
178 |     [data-testid="stSidebar"] input,
179 |     [data-testid="stSidebar"] select,
180 |     [data-testid="stSidebar"] textarea,
181 |     [data-testid="stSidebar"] button {
182 |         color: #2c3e50 !important;
183 |     }
184 | 
185 |     /* Slider and select labels */
186 |     [data-testid="stSidebar"] .stSlider label,
187 |     [data-testid="stSidebar"] .stSelectbox label,
188 |     [data-testid="stSidebar"] .stFileUploader label {
189 |         color: #2c3e50 !important;
190 |         font-weight: 600 !important;
191 |     }
192 | 
193 |     /* Fallback for older Streamlit versions */
194 |     .css-1d391kg, .css-12oz5g7 {
195 |         background-color: #ffffff !important;
196 |         color: #2c3e50 !important;
197 |     }
198 | 
199 |     .css-1d391kg *, .css-12oz5g7 * {
200 |         color: #2c3e50 !important;
201 |     }
202 | 
203 |     .css-1d391kg p, .css-12oz5g7 p,
204 |     .css-1d391kg span, .css-12oz5g7 span,
205 |     .css-1d391kg div, .css-12oz5g7 div {
206 |         color: #2c3e50 !important;
207 |     }
208 | </style>
209 | """, unsafe_allow_html=True)
210 | 
211 | # Main header with custom styling
212 | st.markdown('<h1 class="main-header">🚀 Advanced Time Series Forecasting Pro</h1>', unsafe_allow_html=True)
213 | st.markdown('<p style="text-align: center; font-size: 1.1rem; color: #6c757d; margin-bottom: 2rem;">Professional time series analysis with 11+ forecasting models, ensemble methods, and AI-powered insights</p>', unsafe_allow_html=True)
214 | 
215 | # Initialize session state for advanced features
216 | if 'models_trained' not in st.session_state:
217 |     st.session_state.models_trained = {}
218 | if 'ensemble_models' not in st.session_state:
219 |     st.session_state.ensemble_models = {}
220 | if 'predictions' not in st.session_state:
221 |     st.session_state.predictions = {}
222 | 
223 | # Sidebar for inputs
224 | st.sidebar.markdown('<p class="sidebar-header">📁 Data Upload</p>', unsafe_allow_html=True)
225 | uploaded_file = st.sidebar.file_uploader('Upload CSV file', type=['csv'])
226 | if uploaded_file is not None:
227 |     df = load_data(uploaded_file)
228 |     st.sidebar.success('✅ Data loaded successfully!')
229 | else:
230 |     st.sidebar.info('ℹ️ Using sample data.')
231 |     df = create_sample_data()
232 | 
233 | st.sidebar.markdown('<p class="sidebar-header">⚙️ Preprocessing</p>', unsafe_allow_html=True)
234 | lags = st.sidebar.slider('Number of lag features', 1, 20, 5)
235 | test_size = st.sidebar.slider('Test size', 0.1, 0.5, 0.2)
236 | scaling_method = st.sidebar.selectbox('Feature scaling', ['none', 'standard', 'minmax'], index=0)
237 | 
238 | X_train, X_test, y_train, y_test = preprocess_data(df, lags=lags, test_size=test_size)
239 | 
240 | # Apply scaling if selected
241 | if scaling_method != 'none':
242 |     X_train_scaled, X_test_scaled, scaler = scale_features(X_train, X_test, method=scaling_method)
243 |     X_train, X_test = X_train_scaled, X_test_scaled
244 | 
245 | st.sidebar.markdown('<p class="sidebar-header">🤖 Model Selection</p>', unsafe_allow_html=True)
246 | model_options = ['XGBoost', 'Random Forest', 'LSTM', 'Gradient Boosting', 'AdaBoost', 'SVR', 'MLP',
247 |                  'Bidirectional LSTM', 'GRU', 'ARIMA', 'SARIMA', 'Ensemble (Voting)', 'Ensemble (Stacking)']
248 | model_choice = st.sidebar.selectbox('Choose model', model_options)
249 | 
250 | # Advanced features
251 | st.sidebar.markdown('<p class="sidebar-header">🔬 Advanced Features</p>', unsafe_allow_html=True)
252 | enable_shap = st.sidebar.checkbox('Enable SHAP analysis', value=False, disabled=not SHAP_AVAILABLE)
253 | enable_cross_validation = st.sidebar.checkbox('Time series cross-validation', value=False)
254 | enable_feature_importance = st.sidebar.checkbox('Feature importance analysis', value=False)
255 | 
256 | st.sidebar.markdown('<p class="sidebar-header">🔧 Hyperparameters</p>', unsafe_allow_html=True)
257 | if model_choice in DEFAULT_PARAMS:
258 |     params = DEFAULT_PARAMS[model_choice].copy()
259 |     for param_name, default_value in params.items():
260 |         if isinstance(default_value, int):
261 |             params[param_name] = st.sidebar.slider(param_name, 1, 200, default_value)
262 |         elif isinstance(default_value, float):
263 |             params[param_name] = st.sidebar.slider(param_name, 0.001, 1.0, default_value)
264 | else:
265 |     st.sidebar.warning(f"Default parameters not set for {model_choice}. Using basic settings.")
266 |     params = {}
267 | 
268 | tune = st.sidebar.checkbox('Tune hyperparameters?')
269 | if tune and model_choice in TUNING_GRIDS:
270 |     param_grid = TUNING_GRIDS[model_choice]
271 |     model, best_params = tune_hyperparameters(model_choice.lower().replace(' ', '_'), X_train, y_train, param_grid)
272 |     st.sidebar.write('Best params:', best_params)
273 | else:
274 |     # Train individual models
275 |     if model_choice == 'XGBoost':
276 |         model = train_xgboost(X_train, y_train, params)
277 |     elif model_choice == 'Random Forest':
278 |         model = train_random_forest(X_train, y_train, params)
279 |     elif model_choice == 'LSTM':
280 |         model = train_lstm(X_train, y_train, params)
281 |     elif model_choice == 'Gradient Boosting':
282 |         model = train_gradient_boosting(X_train, y_train, params)
283 |     elif model_choice == 'AdaBoost':
284 |         model = train_ada_boost(X_train, y_train, params)
285 |     elif model_choice == 'SVR':
286 |         model = train_svr(X_train, y_train, params)
287 |     elif model_choice == 'MLP':
288 |         model = train_mlp(X_train, y_train, params)
289 |     elif model_choice == 'Bidirectional LSTM':
290 |         model = train_bidirectional_lstm(X_train, y_train, params)
291 |     elif model_choice == 'GRU':
292 |         model = train_gru(X_train, y_train, params)
293 |     elif model_choice == 'ARIMA':
294 |         model = train_arima(y_train)
295 |     elif model_choice == 'SARIMA':
296 |         model = train_sarima(y_train)
297 |     elif model_choice == 'Ensemble (Voting)':
298 |         if VOTING_AVAILABLE:
299 |             # Train multiple models for ensemble
300 |             models = []
301 |             model_names = ['XGBoost', 'Random Forest', 'Gradient Boosting']
302 | 
303 |             for name in model_names:
304 |                 if name == 'XGBoost':
305 |                     m = train_xgboost(X_train, y_train, DEFAULT_PARAMS.get('XGBoost', {}))
306 |                 elif name == 'Random Forest':
307 |                     m = train_random_forest(X_train, y_train, DEFAULT_PARAMS.get('Random Forest', {}))
308 |                 elif name == 'Gradient Boosting':
309 |                     m = train_gradient_boosting(X_train, y_train, DEFAULT_PARAMS.get('Gradient Boosting', {}))
310 |                 models.append((name, m))
311 | 
312 |             model = VotingRegressor(estimators=models)
313 |             model.fit(X_train, y_train)
314 |         else:
315 |             st.error("VotingRegressor not available. Install scikit-learn.")
316 |             st.stop()
317 |     elif model_choice == 'Ensemble (Stacking)':
318 |         # Implement stacking ensemble
319 |         base_models = []
320 |         model_names = ['XGBoost', 'Random Forest', 'Gradient Boosting']
321 | 
322 |         for name in model_names:
323 |             if name == 'XGBoost':
324 |                 m = train_xgboost(X_train, y_train, DEFAULT_PARAMS.get('XGBoost', {}))
325 |             elif name == 'Random Forest':
326 |                 m = train_random_forest(X_train, y_train, DEFAULT_PARAMS.get('Random Forest', {}))
327 |             elif name == 'Gradient Boosting':
328 |                 m = train_gradient_boosting(X_train, y_train, DEFAULT_PARAMS.get('Gradient Boosting', {}))
329 |             base_models.append(m)
330 | 
331 |         # Use XGBoost as meta model
332 |         meta_model = train_xgboost(X_train, y_train, DEFAULT_PARAMS.get('XGBoost', {}))
333 | 
334 |         # Simple stacking implementation
335 |         base_predictions = np.column_stack([m.predict(X_train) for m in base_models])
336 |         meta_model.fit(base_predictions, y_train)
337 |         model = {'base_models': base_models, 'meta_model': meta_model}
338 |     else:
339 |         st.error(f"Model {model_choice} not implemented yet.")
340 |         st.stop()
341 | 
342 | # Train and predict
343 | if model_choice in ['LSTM', 'Bidirectional LSTM', 'GRU']:
344 |     X_test_reshaped = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))
345 |     y_pred = model.predict(X_test_reshaped).flatten()
346 | elif model_choice in ['ARIMA', 'SARIMA']:
347 |     # For ARIMA/SARIMA, forecast the test period
348 |     y_pred = model.forecast(steps=len(y_test))
349 |     y_pred.index = y_test.index
350 | elif model_choice == 'Ensemble (Stacking)':
351 |     # Stacking prediction
352 |     base_predictions = np.column_stack([m.predict(X_test) for m in model['base_models']])
353 |     y_pred = model['meta_model'].predict(base_predictions)
354 | else:
355 |     y_pred = model.predict(X_test)
356 | 
357 | # Evaluate
358 | metrics = evaluate_model(y_test, y_pred)
359 | 
360 | # Display metrics in styled cards
361 | st.markdown('<h2 class="section-header">📊 Model Evaluation</h2>', unsafe_allow_html=True)
362 | 
363 | col1, col2, col3, col4 = st.columns(4)
364 | with col1:
365 |     st.markdown(f"""
366 |     <div class="metric-card">
367 |         <div class="metric-label">MAE</div>
368 |         <div class="metric-value">{metrics['MAE']:.4f}</div>
369 |     </div>
370 |     """, unsafe_allow_html=True)
371 | 
372 | with col2:
373 |     st.markdown(f"""
374 |     <div class="metric-card">
375 |         <div class="metric-label">RMSE</div>
376 |         <div class="metric-value">{metrics['RMSE']:.4f}</div>
377 |     </div>
378 |     """, unsafe_allow_html=True)
379 | 
380 | with col3:
381 |     st.markdown(f"""
382 |     <div class="metric-card">
383 |         <div class="metric-label">MAPE</div>
384 |         <div class="metric-value">{metrics['MAPE']:.2f}%</div>
385 |     </div>
386 |     """, unsafe_allow_html=True)
387 | 
388 | with col4:
389 |     st.markdown(f"""
390 |     <div class="metric-card">
391 |         <div class="metric-label">R² Score</div>
392 |         <div class="metric-value">{metrics['R2']:.4f}</div>
393 |     </div>
394 |     """, unsafe_allow_html=True)
395 | 
396 | # Advanced evaluation option
397 | if st.checkbox('🔬 Show Advanced Metrics'):
398 |     adv_metrics = comprehensive_evaluation(y_test, y_pred, y_train)
399 |     st.markdown('<h3 class="section-header">Advanced Performance Metrics</h3>', unsafe_allow_html=True)
400 |     for key, value in adv_metrics.items():
401 |         if isinstance(value, dict):
402 |             st.markdown(f"**{key}:**")
403 |             for sub_key, sub_value in value.items():
404 |                 st.write(f"  - {sub_key}: {sub_value}")
405 |         else:
406 |             st.write(f"**{key}:** {value}")
407 | 
408 | # Visualize
409 | st.markdown('<h2 class="section-header">📈 Visualizations</h2>', unsafe_allow_html=True)
410 | 
411 | # Interactive Plot
412 | if st.checkbox('📊 Show Interactive Plot'):
413 |     interactive_fig = plot_forecast_interactive(y_test, y_pred)
414 |     st.plotly_chart(interactive_fig)
415 | 
416 | # Static plots
417 | st.markdown('<h3 class="section-header">Basic Analysis</h3>', unsafe_allow_html=True)
418 | col1, col2 = st.columns(2)
419 | 
420 | with col1:
421 |     fig1, ax1 = plt.subplots(figsize=(8, 5))
422 |     ax1.plot(y_test.index, y_test, label='Observed', color='blue', linewidth=2)
423 |     ax1.plot(y_test.index, y_pred, label='Predicted', color='red', linewidth=2, linestyle='--')
424 |     ax1.set_xlabel('Date', fontsize=12)
425 |     ax1.set_ylabel('Value', fontsize=12)
426 |     ax1.set_title('Forecast vs Actual', fontsize=14, fontweight='bold')
427 |     ax1.legend()
428 |     ax1.grid(True, alpha=0.3)
429 |     st.pyplot(fig1)
430 | 
431 | with col2:
432 |     residuals = y_test - y_pred
433 |     fig2, ax2 = plt.subplots(figsize=(8, 5))
434 |     ax2.plot(residuals.index, residuals, color='green', linewidth=2)
435 |     ax2.axhline(0, color='black', linestyle='--', linewidth=1)
436 |     ax2.set_xlabel('Date', fontsize=12)
437 |     ax2.set_ylabel('Residuals', fontsize=12)
438 |     ax2.set_title('Residuals Over Time', fontsize=14, fontweight='bold')
439 |     ax2.grid(True, alpha=0.3)
440 |     st.pyplot(fig2)
441 | 
442 | # Residuals distribution
443 | st.markdown('<h3 class="section-header">Residuals Distribution</h3>', unsafe_allow_html=True)
444 | fig3, ax3 = plt.subplots(figsize=(10, 6))
445 | ax3.hist(residuals, bins=20, alpha=0.7, color='purple', edgecolor='black')
446 | ax3.set_title('Residuals Distribution', fontsize=14, fontweight='bold')
447 | ax3.set_xlabel('Residual Value', fontsize=12)
448 | ax3.set_ylabel('Frequency', fontsize=12)
449 | ax3.grid(True, alpha=0.3)
450 | st.pyplot(fig3)
451 | 
452 | # Advanced visualizations
453 | if st.checkbox('🔬 Show Advanced Visualizations'):
454 |     st.markdown('<h3 class="section-header">Comprehensive Residuals Analysis</h3>', unsafe_allow_html=True)
455 |     res_fig = plot_residuals_analysis(y_test, y_pred)
456 |     st.pyplot(res_fig)
457 | 
458 |     st.markdown('<h3 class="section-header">Seasonal Decomposition</h3>', unsafe_allow_html=True)
459 |     try:
460 |         seasonal_fig = plot_seasonal_decomposition(df[DATA_SETTINGS['target_col']])
461 |         st.pyplot(seasonal_fig)
462 |     except:
463 |         st.warning("⚠️ Seasonal decomposition requires more data points (minimum 24 observations recommended).")
464 | 
465 | # Advanced Features Section
466 | st.markdown('<h2 class="section-header">🚀 Advanced AI Features</h2>', unsafe_allow_html=True)
467 | 
468 | # SHAP Analysis
469 | if enable_shap and SHAP_AVAILABLE and hasattr(model, 'predict'):
470 |     try:
471 |         st.markdown('<h3 class="section-header">SHAP Feature Importance</h3>', unsafe_allow_html=True)
472 | 
473 |         # Sample data for SHAP (use smaller sample for performance)
474 |         background = X_train.sample(min(100, len(X_train)), random_state=42)
475 |         test_sample = X_test.sample(min(50, len(X_test)), random_state=42)
476 | 
477 |         if model_choice in ['XGBoost', 'Random Forest', 'Gradient Boosting']:
478 |             explainer = shap.TreeExplainer(model)
479 |             shap_values = explainer.shap_values(test_sample)
480 | 
481 |             # Summary plot
482 |             fig, ax = plt.subplots(figsize=(10, 6))
483 |             shap.summary_plot(shap_values, test_sample, show=False)
484 |             st.pyplot(fig)
485 | 
486 |             # Waterfall plot for first prediction
487 |             st.markdown("**SHAP Waterfall Plot (First Prediction):**")
488 |             fig, ax = plt.subplots(figsize=(10, 6))
489 |             shap.plots.waterfall(explainer.expected_value, shap_values[0], test_sample.iloc[0], show=False)
490 |             st.pyplot(fig)
491 | 
492 |         else:
493 |             st.info("SHAP analysis is most effective for tree-based models (XGBoost, Random Forest, Gradient Boosting).")
494 | 
495 |     except Exception as e:
496 |         st.error(f"SHAP analysis failed: {str(e)}")
497 | 
498 | # Feature Importance Analysis
499 | if enable_feature_importance and hasattr(model, 'feature_importances_'):
500 |     st.markdown('<h3 class="section-header">Feature Importance Analysis</h3>', unsafe_allow_html=True)
501 | 
502 |     try:
503 |         importance = model.feature_importances_
504 |         feature_names = [f'lag_{i+1}' for i in range(len(importance))]
505 | 
506 |         fig, ax = plt.subplots(figsize=(10, 6))
507 |         indices = np.argsort(importance)[::-1][:min(20, len(importance))]
508 |         ax.bar(range(len(indices)), importance[indices], align='center')
509 |         ax.set_xticks(range(len(indices)))
510 |         ax.set_xticklabels([feature_names[i] for i in indices], rotation=45)
511 |         ax.set_title('Feature Importances')
512 |         ax.set_xlabel('Features')
513 |         ax.set_ylabel('Importance')
514 |         plt.tight_layout()
515 |         st.pyplot(fig)
516 | 
517 |     except Exception as e:
518 |         st.error(f"Feature importance analysis failed: {str(e)}")
519 | 
520 | # Cross-validation
521 | if enable_cross_validation:
522 |     st.markdown('<h3 class="section-header">Time Series Cross-Validation</h3>', unsafe_allow_html=True)
523 | 
524 |     try:
525 |         from sklearn.model_selection import cross_val_score
526 |         from sklearn.metrics import make_scorer, mean_absolute_error
527 | 
528 |         # Time series split
529 |         tscv = TimeSeriesSplit(n_splits=5)
530 | 
531 |         if model_choice not in ['ARIMA', 'SARIMA', 'Ensemble (Stacking)']:
532 |             cv_scores = cross_val_score(model, X_train, y_train, cv=tscv, scoring=make_scorer(mean_absolute_error))
533 | 
534 |             st.write(f"Cross-validation MAE scores: {cv_scores}")
535 |             st.write(f"Mean CV MAE: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")
536 | 
537 |             # Plot CV scores
538 |             fig, ax = plt.subplots(figsize=(8, 5))
539 |             ax.plot(range(1, len(cv_scores) + 1), cv_scores, 'o-', linewidth=2, markersize=8)
540 |             ax.set_title('Cross-Validation Scores')
541 |             ax.set_xlabel('Fold')
542 |             ax.set_ylabel('MAE')
543 |             ax.grid(True, alpha=0.3)
544 |             st.pyplot(fig)
545 |         else:
546 |             st.info("Cross-validation is not applicable for ARIMA/SARIMA or stacking models in this implementation.")
547 | 
548 |     except Exception as e:
549 |         st.error(f"Cross-validation failed: {str(e)}")
550 | 
551 | # Model Comparison Dashboard
552 | if st.checkbox('📊 Create Model Comparison Dashboard'):
553 |     st.markdown('<h3 class="section-header">Model Comparison Dashboard</h3>', unsafe_allow_html=True)
554 | 
555 |     try:
556 |         dashboard_fig = create_dashboard(y_test, y_pred, metrics, model_choice)
557 |         st.plotly_chart(dashboard_fig, use_container_width=True)
558 |     except Exception as e:
559 |         st.error(f"Dashboard creation failed: {str(e)}")
560 | 
561 | # Export Results
562 | st.markdown('<h2 class="section-header">💾 Export Results</h2>', unsafe_allow_html=True)
563 | 
564 | col1, col2, col3 = st.columns(3)
565 | 
566 | with col1:
567 |     if st.button('📄 Export Metrics Report'):
568 |         report = print_evaluation_report(metrics, model_choice)
569 |         st.download_button(
570 |             label="Download Report",
571 |             data=report,
572 |             file_name=f"{model_choice}_report.txt",
573 |             mime="text/plain"
574 |         )
575 | 
576 | with col2:
577 |     if st.button('📊 Export Predictions CSV'):
578 |         results_df = pd.DataFrame({
579 |             'Date': y_test.index,
580 |             'Actual': y_test.values,
581 |             'Predicted': y_pred,
582 |             'Residual': y_test.values - y_pred
583 |         })
584 |         csv = results_df.to_csv(index=False)
585 |         st.download_button(
586 |             label="Download CSV",
587 |             data=csv,
588 |             file_name=f"{model_choice}_predictions.csv",
589 |             mime="text/csv"
590 |         )
591 | 
592 | with col3:
593 |     if st.button('💾 Save Model'):
594 |         try:
595 |             if model_choice not in ['ARIMA', 'SARIMA', 'Ensemble (Stacking)']:
596 |                 save_model(model, f"{model_choice.lower().replace(' ', '_')}_model.pkl")
597 |                 st.success(f"✅ Model saved as {model_choice.lower().replace(' ', '_')}_model.pkl")
598 |             else:
599 |                 st.warning("Model saving not implemented for this model type.")
600 |         except Exception as e:
601 |             st.error(f"Failed to save model: {str(e)}")
602 | 
603 | # Footer
604 | st.markdown("---")
605 | st.markdown('<p style="text-align: center; color: #6c757d;">🚀 Advanced Time Series Forecasting Pro - Powered by AI & Machine Learning</p>', unsafe_allow_html=True)
606 | 


--------------------------------------------------------------------------------