├── .gitattributes ├── .spyproject └── config │ ├── backups │ ├── codestyle.ini.bak │ ├── encoding.ini.bak │ ├── vcs.ini.bak │ └── workspace.ini.bak │ ├── codestyle.ini │ ├── defaults │ ├── defaults-codestyle-0.2.0.ini │ ├── defaults-encoding-0.2.0.ini │ ├── defaults-vcs-0.2.0.ini │ └── defaults-workspace-0.2.0.ini │ ├── encoding.ini │ ├── vcs.ini │ └── workspace.ini ├── CITATION.cff ├── LICENSE ├── LazyProphet.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── requires.txt └── top_level.txt ├── LazyProphet ├── FourierBasisFunction.py ├── LazyProphet.py ├── LinearBasisFunction.py ├── Optimizer.py ├── __init__.py ├── __pycache__ │ ├── FourierBasisFunction.cpython-37.pyc │ ├── LazyProphet.cpython-37.pyc │ ├── LinearBasisFunction.cpython-37.pyc │ ├── Optimizer.cpython-37.pyc │ └── __init__.cpython-37.pyc └── static │ ├── example_output.png │ └── lp_logo.png ├── README.md ├── __init__.py ├── __pycache__ └── __init__.cpython-37.pyc ├── build └── lib │ └── LazyProphet │ ├── FourierBasisFunction.py │ ├── LazyProphet.py │ ├── LinearBasisFunction.py │ ├── Optimizer.py │ └── __init__.py ├── dist ├── LazyProphet-0.3.3-py3-none-any.whl ├── LazyProphet-0.3.4-py3-none-any.whl ├── LazyProphet-0.3.5-py3-none-any.whl ├── LazyProphet-0.3.6-py3-none-any.whl ├── LazyProphet-0.3.7-py3-none-any.whl └── LazyProphet-0.3.8-py3-none-any.whl └── setup.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.spyproject/config/backups/codestyle.ini.bak: -------------------------------------------------------------------------------- 1 | [codestyle] 2 | indentation = True 3 | edge_line = True 4 | edge_line_columns = 79 5 | 6 | [main] 7 | version = 0.2.0 8 | 9 | -------------------------------------------------------------------------------- /.spyproject/config/backups/encoding.ini.bak: -------------------------------------------------------------------------------- 1 | [encoding] 2 | text_encoding = utf-8 3 | 4 | [main] 5 | version = 0.2.0 6 | 7 | -------------------------------------------------------------------------------- /.spyproject/config/backups/vcs.ini.bak: -------------------------------------------------------------------------------- 1 | [vcs] 2 | use_version_control = False 3 | version_control_system = 4 | 5 | [main] 6 | version = 0.2.0 7 | 8 | -------------------------------------------------------------------------------- /.spyproject/config/backups/workspace.ini.bak: -------------------------------------------------------------------------------- 1 | [workspace] 2 | restore_data_on_startup = True 3 | save_data_on_exit = True 4 | save_history = True 5 | save_non_project_files = False 6 | project_type = 'empty-project-type' 7 | recent_files = ['..\\.spyder-py3\\temp.py', 'LazyProphet\\LazyProphet.py', 'setup.py', 'LazyProphet\\FourierBasisFunction.py', 'LazyProphet\\LinearBasisFunction.py', 'LazyProphet\\Optimizer.py', 'LazyProphet\\Ensemble.py'] 8 | 9 | [main] 10 | version = 0.2.0 11 | recent_files = [] 12 | 13 | -------------------------------------------------------------------------------- /.spyproject/config/codestyle.ini: -------------------------------------------------------------------------------- 1 | [codestyle] 2 | indentation = True 3 | edge_line = True 4 | edge_line_columns = 79 5 | 6 | [main] 7 | version = 0.2.0 8 | 9 | -------------------------------------------------------------------------------- /.spyproject/config/defaults/defaults-codestyle-0.2.0.ini: -------------------------------------------------------------------------------- 1 | [codestyle] 2 | indentation = True 3 | edge_line = True 4 | edge_line_columns = 79 5 | 6 | -------------------------------------------------------------------------------- /.spyproject/config/defaults/defaults-encoding-0.2.0.ini: -------------------------------------------------------------------------------- 1 | [encoding] 2 | text_encoding = utf-8 3 | 4 | -------------------------------------------------------------------------------- /.spyproject/config/defaults/defaults-vcs-0.2.0.ini: -------------------------------------------------------------------------------- 1 | [vcs] 2 | use_version_control = False 3 | version_control_system = 4 | 5 | -------------------------------------------------------------------------------- /.spyproject/config/defaults/defaults-workspace-0.2.0.ini: -------------------------------------------------------------------------------- 1 | [workspace] 2 | restore_data_on_startup = True 3 | save_data_on_exit = True 4 | save_history = True 5 | save_non_project_files = False 6 | 7 | -------------------------------------------------------------------------------- /.spyproject/config/encoding.ini: -------------------------------------------------------------------------------- 1 | [encoding] 2 | text_encoding = utf-8 3 | 4 | [main] 5 | version = 0.2.0 6 | 7 | -------------------------------------------------------------------------------- /.spyproject/config/vcs.ini: -------------------------------------------------------------------------------- 1 | [vcs] 2 | use_version_control = False 3 | version_control_system = 4 | 5 | [main] 6 | version = 0.2.0 7 | 8 | -------------------------------------------------------------------------------- /.spyproject/config/workspace.ini: -------------------------------------------------------------------------------- 1 | [workspace] 2 | restore_data_on_startup = True 3 | save_data_on_exit = True 4 | save_history = True 5 | save_non_project_files = False 6 | project_type = 'empty-project-type' 7 | recent_files = ['..\\.spyder-py3\\temp.py', 'LazyProphet\\LazyProphet.py', 'setup.py', 'LazyProphet\\FourierBasisFunction.py', 'LazyProphet\\LinearBasisFunction.py', 'LazyProphet\\Optimizer.py', 'LazyProphet\\Ensemble.py'] 8 | 9 | [main] 10 | version = 0.2.0 11 | recent_files = [] 12 | 13 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: LazyProphet 6 | message: >- 7 | If you want to cite the work, please use this 8 | information. 9 | type: software 10 | authors: 11 | - given-names: Tyler 12 | family-names: Blume 13 | email: tyler.blume@mail.usf.edu 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 tblume1992 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LazyProphet.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: LazyProphet 3 | Version: 0.3.8 4 | Summary: Time series forecasting with LightGBM 5 | Home-page: https://github.com/tblume1992/LazyProphet 6 | Author: Tyler Blume 7 | Author-email: tblume@mail.USF.edu 8 | License: UNKNOWN 9 | Keywords: forecasting,time series,lightgbm 10 | Platform: UNKNOWN 11 | Classifier: Programming Language :: Python :: 3 12 | Classifier: License :: OSI Approved :: MIT License 13 | Classifier: Operating System :: OS Independent 14 | Description-Content-Type: text/markdown 15 | License-File: LICENSE 16 | 17 | # LazyProphet v0.3.8 18 | 19 | ## Recent Changes 20 | 21 | With v0.3.8 comes a fully fledged Optuna Optimizer for simple (no exogenous) regression problems. Classification is ToDo. 22 | 23 | A Quick example of the new functionality: 24 | 25 | ``` 26 | from LazyProphet import LazyProphet as lp 27 | from sklearn.datasets import fetch_openml 28 | import matplotlib.pyplot as plt 29 | 30 | bike_sharing = fetch_openml("Bike_Sharing_Demand", version=2, as_frame=True) 31 | y = bike_sharing.frame['count'] 32 | y = y[-400:].values 33 | 34 | lp_model = lp.LazyProphet.Optimize(y, 35 | seasonal_period=[24, 168], 36 | n_folds=2, # must be greater than 1 37 | n_trials=20, # number of optimization runs, default is 100 38 | test_size=48 # size of the holdout set to test against 39 | ) 40 | fitted = lp_model.fit(y) 41 | predicted = lp_model.predict(100) 42 | 43 | plt.plot(y) 44 | plt.plot(np.append(fitted, predicted)) 45 | plt.axvline(400) 46 | plt.show() 47 | ``` 48 | 49 | ## Introduction 50 | 51 | [A decent intro can be found here.](https://medium.com/p/3745bafe5ce5) 52 | 53 | LazyProphet is a time series forecasting model built for LightGBM forecasting of single time series. 54 | 55 | Many nice-ities have been added such as recursive forecasting when using lagged target variable such as the last 4 values to predict the 5th. 56 | 57 | Additionally, fourier basis functions and penalized weighted piecewise linear basis functions are options as well! 58 | 59 | Don't ever use in-sample fit for these types of models as they fit the data quite snuggly. 60 | 61 | ## Quickstart 62 | 63 | ``` 64 | pip install LazyProphet 65 | ``` 66 | 67 | Simple example from Sklearn, just give it the hyperparameters and an array: 68 | 69 | ``` 70 | from LazyProphet import LazyProphet as lp 71 | from sklearn.datasets import fetch_openml 72 | import matplotlib.pyplot as plt 73 | 74 | bike_sharing = fetch_openml("Bike_Sharing_Demand", version=2, as_frame=True) 75 | y = bike_sharing.frame['count'] 76 | y = y[-400:].values 77 | 78 | lp_model = lp.LazyProphet(seasonal_period=[24, 168], #list means we use both seasonal periods 79 | n_basis=4, #weighted piecewise basis functions 80 | fourier_order=10, 81 | ar=list(range(1,25)), 82 | decay=.99 #the 'penalized' in penalized weighted piecewise linear basis functions 83 | ) 84 | fitted = lp_model.fit(y) 85 | predicted = lp_model.predict(100) 86 | 87 | plt.plot(y) 88 | plt.plot(np.append(fitted, predicted)) 89 | plt.axvline(400) 90 | plt.show() 91 | ``` 92 | ![alt text](https://github.com/tblume1992/LazyProphet/blob/main/LazyProphet/static/example_output.png "Output 1") 93 | 94 | If you are working with less data or then you will probably want to pass custom LightGBM params via boosting_params when creating the LazyProphet obj. 95 | 96 | The default params are: 97 | 98 | ``` 99 | boosting_params = { 100 | "objective": "regression", 101 | "metric": "rmse", 102 | "verbosity": -1, 103 | "boosting_type": "gbdt", 104 | "seed": 42, 105 | 'linear_tree': False, 106 | 'learning_rate': .15, 107 | 'min_child_samples': 5, 108 | 'num_leaves': 31, 109 | 'num_iterations': 50 110 | } 111 | ``` 112 | *WARNING* 113 | Passing linear_tree=True can be extremely unstable, especially with ar and n_basis arguments. We do tests for linearity and will de-trend if necessary. 114 | ** 115 | 116 | Most importantly for controlling the complexity by using num_leaves/learning_rate for complexity with less data. 117 | 118 | Alternatively, you could try out the method: 119 | 120 | ``` 121 | tree_optimize(y, exogenous=None, cv_splits=3, test_size=None) 122 | ``` 123 | In-place of the fit method. This will do 'cv_splits' number of Time-Series Cross-Validation steps to optimize the tree using Optuna. This method has some degraded performance in testing but may be better for autoforecasting various types of data sizes. 124 | 125 | 126 | -------------------------------------------------------------------------------- /LazyProphet.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | .gitattributes 2 | LICENSE 3 | README.md 4 | __init__.py 5 | setup.py 6 | .spyproject/config/codestyle.ini 7 | .spyproject/config/encoding.ini 8 | .spyproject/config/vcs.ini 9 | .spyproject/config/workspace.ini 10 | .spyproject/config/backups/codestyle.ini.bak 11 | .spyproject/config/backups/encoding.ini.bak 12 | .spyproject/config/backups/vcs.ini.bak 13 | .spyproject/config/backups/workspace.ini.bak 14 | .spyproject/config/defaults/defaults-codestyle-0.2.0.ini 15 | .spyproject/config/defaults/defaults-encoding-0.2.0.ini 16 | .spyproject/config/defaults/defaults-vcs-0.2.0.ini 17 | .spyproject/config/defaults/defaults-workspace-0.2.0.ini 18 | LazyProphet/FourierBasisFunction.py 19 | LazyProphet/LazyProphet.py 20 | LazyProphet/LinearBasisFunction.py 21 | LazyProphet/Optimizer.py 22 | LazyProphet/__init__.py 23 | LazyProphet.egg-info/PKG-INFO 24 | LazyProphet.egg-info/SOURCES.txt 25 | LazyProphet.egg-info/dependency_links.txt 26 | LazyProphet.egg-info/requires.txt 27 | LazyProphet.egg-info/top_level.txt 28 | LazyProphet/__pycache__/FourierBasisFunction.cpython-37.pyc 29 | LazyProphet/__pycache__/LazyProphet.cpython-37.pyc 30 | LazyProphet/__pycache__/LinearBasisFunction.cpython-37.pyc 31 | LazyProphet/__pycache__/__init__.cpython-37.pyc 32 | LazyProphet/static/example_output.png 33 | __pycache__/__init__.cpython-37.pyc 34 | dist/LazyProphet-0.3.3-py3-none-any.whl 35 | dist/LazyProphet-0.3.4-py3-none-any.whl 36 | dist/LazyProphet-0.3.5-py3-none-any.whl 37 | dist/LazyProphet-0.3.6-py3-none-any.whl -------------------------------------------------------------------------------- /LazyProphet.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /LazyProphet.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | statsmodels 4 | scikit-learn 5 | optuna 6 | scipy 7 | matplotlib 8 | lightgbm 9 | -------------------------------------------------------------------------------- /LazyProphet.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | LazyProphet 2 | -------------------------------------------------------------------------------- /LazyProphet/FourierBasisFunction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | 4 | 5 | class FourierBasisFunction: 6 | 7 | def __init__(self, fourier_order, seasonal_weights=None): 8 | self.fourier_order = fourier_order 9 | self.seasonal_weights = seasonal_weights 10 | if self.seasonal_weights is not None: 11 | self.seasonal_weights = np.array(self.seasonal_weights).reshape((-1, 1)) 12 | 13 | def get_fourier_series(self, y, seasonal_period): 14 | x = 2 * np.pi * np.arange(1, self.fourier_order + 1) / seasonal_period 15 | t = np.arange(1, len(y) + 1) 16 | x = x * t[:, None] 17 | fourier_series = np.concatenate((np.cos(x), np.sin(x)), axis=1) 18 | return fourier_series 19 | 20 | def get_harmonics(self, y, seasonal_period): 21 | harmonics = self.get_fourier_series(y, seasonal_period) 22 | if self.seasonal_weights is not None: 23 | harmonics = harmonics * self.seasonal_weights 24 | return harmonics 25 | 26 | def get_future_harmonics(self, harmonics, forecast_horizon, seasonal_period): 27 | total_length = len(harmonics) + forecast_horizon 28 | future_harmonics = self.get_fourier_series(np.arange(total_length), seasonal_period) 29 | if self.seasonal_weights is None: 30 | return future_harmonics[len(harmonics):, :] 31 | else: 32 | return future_harmonics[len(harmonics):, :] * self.seasonal_weights[-1] 33 | 34 | -------------------------------------------------------------------------------- /LazyProphet/LazyProphet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Feb 12 08:19:32 2022 4 | 5 | @author: Tyler Blume 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | from sklearn.preprocessing import StandardScaler 11 | from sklearn.model_selection import TimeSeriesSplit 12 | import optuna.integration.lightgbm as lgb 13 | import optuna 14 | from scipy import stats 15 | import lightgbm as gbm 16 | import warnings 17 | from LazyProphet.Optimizer import Optimize 18 | from LazyProphet.LinearBasisFunction import LinearBasisFunction 19 | from LazyProphet.FourierBasisFunction import FourierBasisFunction 20 | warnings.filterwarnings("ignore") 21 | 22 | 23 | class LazyProphet: 24 | 25 | def __init__(self, 26 | objective='regression', 27 | seasonal_period=None, 28 | fourier_order=10, 29 | n_basis=10, 30 | ar=None, 31 | ma_windows=None, 32 | decay=None, 33 | scale=True, 34 | weighted=True, 35 | decay_average=False, 36 | seasonality_weights=None, 37 | linear_trend='auto', 38 | boosting_params=None, 39 | series_features=None, 40 | return_proba=False, 41 | trend_penalty=True, 42 | n_estimators=50, 43 | num_leaves=31, 44 | learning_rate=.1, 45 | colsample_bytree=1): 46 | self.objective = objective 47 | self.trend_penalty = trend_penalty 48 | self.exogenous = None 49 | if seasonal_period is not None: 50 | if not isinstance(seasonal_period, list): 51 | seasonal_period = [seasonal_period] 52 | self.seasonal_period = seasonal_period 53 | # if num_leaves < 2: 54 | # num_leaves = 2 55 | if objective == 'classification': 56 | scale = False 57 | linear_trend = False 58 | self.scale = scale 59 | if ar is not None: 60 | if not isinstance(ar, list): 61 | ar = [ar] 62 | self.ar = ar 63 | if ma_windows is not None: 64 | if not isinstance(ma_windows, list): 65 | ma_windows = [ma_windows] 66 | self.ma_windows = ma_windows 67 | self.fourier_order = int(fourier_order) 68 | self.decay = decay 69 | if n_basis: 70 | self.n_basis = int(n_basis) 71 | else: 72 | self.n_basis = None 73 | self.weighted = weighted 74 | self.series_features = series_features 75 | self.component_dict = {} 76 | self.decay_average = decay_average 77 | self.seasonality_weights = seasonality_weights 78 | self.linear_trend = linear_trend 79 | self.return_proba = return_proba 80 | if self.objective == 'regression': 81 | metric = 'rmse' 82 | objective = 'regression' 83 | elif self.objective == 'classification': 84 | metric = 'cross-entropy' 85 | objective = 'binary' 86 | else: 87 | metric = 'None' 88 | if boosting_params is None: 89 | self.boosting_params = { 90 | "objective": objective, 91 | "metric": metric, 92 | "verbosity": -1, 93 | "boosting_type": "gbdt", 94 | "seed": 42, 95 | 'linear_tree': False, 96 | 'learning_rate': learning_rate, 97 | 'colsample_bytree': colsample_bytree, 98 | 'min_child_samples': 5, 99 | 'num_leaves': num_leaves, 100 | 'num_iterations': n_estimators 101 | } 102 | else: 103 | self.boosting_params = boosting_params 104 | 105 | def linear_test(self, y): 106 | y = y.copy().reshape((-1,)) 107 | xi = np.arange(1, len(y) + 1) 108 | xi = xi**2 109 | slope, intercept, r_value, p_value, std_err = stats.linregress(xi,y) 110 | trend_line = slope*xi*r_value + intercept 111 | if self.linear_trend is None or self.linear_trend == 'auto': 112 | n_bins = (1 + len(y)**(1/3) * 2) 113 | # n_bins = int(len(y) / 13) 114 | splitted_array = np.array_split(y.reshape(-1,), int(n_bins)) 115 | mean_splits = np.array([np.mean(i) for i in splitted_array]) 116 | grad = np.gradient(mean_splits) 117 | threshold = .9 * n_bins 118 | if sum(grad < 0) >= threshold or sum(grad > 0) >= threshold: 119 | growth = True 120 | # print('True') 121 | # asc_array = np.sort(mean_splits) 122 | # desc_array = np.flip(asc_array) 123 | # if all(asc_array == mean_splits): 124 | # growth = True 125 | # elif all(desc_array == mean_splits): 126 | # growth = True 127 | else: 128 | growth = False 129 | if (growth): 130 | self.linear_trend = True 131 | else: 132 | self.linear_trend = False 133 | self.slope = slope * r_value 134 | self.penalty = r_value 135 | self.intercept = intercept 136 | return trend_line 137 | 138 | def get_piecewise(self, y): 139 | self.lbf = LinearBasisFunction(n_changepoints=int(self.n_basis), 140 | decay=self.decay, 141 | weighted=self.weighted) 142 | basis = self.lbf.get_basis(y) 143 | return basis 144 | 145 | def get_harmonics(self, y, seasonal_period): 146 | self.fbf = FourierBasisFunction(int(self.fourier_order), 147 | self.seasonality_weights) 148 | basis = self.fbf.get_harmonics(y, seasonal_period) 149 | return basis 150 | 151 | @staticmethod 152 | def shift(xs, n): 153 | e = np.empty_like(xs) 154 | if n >= 0: 155 | e[:n] = np.nan 156 | e[n:] = xs[:-n] 157 | else: 158 | e[n:] = np.nan 159 | e[:n] = xs[-n:] 160 | return e 161 | 162 | @staticmethod 163 | def moving_average(y, window): 164 | y = pd.Series(y.reshape(-1,)) 165 | ma = np.array(y.rolling(window).mean()) 166 | return ma.reshape((-1, 1)) 167 | 168 | def build_input(self, y, exogenous=None): 169 | X = np.arange(len(y)) 170 | X = X.reshape((-1, 1)) 171 | if self.n_basis is not None: 172 | if len(y) <= self.n_basis - 1: 173 | self.n_basis = int(len(y) - 1) 174 | self.basis = self.get_piecewise(y) 175 | X = np.append(X, self.basis, axis=1) 176 | self.component_dict['basis'] = self.basis 177 | if self.seasonal_period: 178 | for period in self.seasonal_period: 179 | harmonics = self.get_harmonics(y, period) 180 | self.component_dict['harmonics ' + str(period)] = harmonics 181 | X = np.append(X, harmonics, axis=1) 182 | if self.exogenous is not None: 183 | X = np.append(X, exogenous, axis=1) 184 | if self.ar is not None: 185 | for ar_order in self.ar: 186 | shifted_y = self.scaled_y.copy() 187 | shifted_y = LazyProphet.shift(shifted_y, ar_order) 188 | X = np.append(X, shifted_y.reshape(-1, 1), axis=1) 189 | if self.ma_windows is not None: 190 | for ma_order in self.ma_windows: 191 | ma = LazyProphet.moving_average(self.scaled_y, ma_order) 192 | X = np.append(X, ma, axis=1) 193 | return X 194 | 195 | def scale_input(self, y): 196 | self.scaler = StandardScaler() 197 | self.scaler.fit(np.asarray(y).reshape(-1, 1)) 198 | self.scaled_y = y.copy() 199 | self.scaled_y = self.scaler.transform(self.scaled_y.reshape(-1, 1)) 200 | 201 | def fit(self, y, X=None): 202 | self.exogenous = X 203 | self.og_y = y 204 | if self.series_features is None: 205 | y = np.array(y) 206 | else: 207 | y = self.series_features 208 | if self.linear_trend is None or self.linear_trend: 209 | fitted_trend = self.linear_test(y) 210 | if self.linear_trend: 211 | y = np.subtract(y, fitted_trend) 212 | #TODO: Should we disable here? 213 | # if self.linear_trend: 214 | # self.ar = None 215 | # self.decay = None 216 | if self.scale: 217 | self.scale_input(y) 218 | else: 219 | self.scaled_y = y.copy() 220 | self.X = self.build_input(self.scaled_y, X) 221 | if self.objective == 'regression': 222 | self.model_obj = gbm.LGBMRegressor(**self.boosting_params) 223 | if self.objective == 'classification': 224 | self.model_obj = gbm.LGBMClassifier(**self.boosting_params) 225 | if self.series_features is None: 226 | self.model_obj.fit(self.X, self.scaled_y.reshape(-1, )) 227 | else: 228 | self.model_obj.fit(self.X, self.og_y.reshape(-1, )) 229 | #commented out from basic feature selection 230 | # self.columns = pd.Series(lp_model.model_obj.feature_importances_).sort_values().index[-100:] 231 | # self.model_obj.fit(self.X[:, self.columns], self.scaled_y.reshape(-1, )) 232 | if self.return_proba: 233 | fitted = self.model_obj.predict_proba(self.X) 234 | fitted = fitted[:, 1].reshape(-1,1) 235 | else: 236 | fitted = self.model_obj.predict(self.X).reshape(-1,1) 237 | if self.scale: 238 | fitted = self.scaler.inverse_transform(fitted) 239 | if self.linear_trend: 240 | fitted = np.add(fitted.reshape(-1,1), fitted_trend.reshape(-1,1)) 241 | return fitted 242 | 243 | def recursive_predict(self, X, forecast_horizon): 244 | self.future_X = X 245 | #TODO: This is just...horrible 246 | predictions = [] 247 | self.full = self.scaled_y.copy() 248 | if self.ar is not None: 249 | self.future_X = np.append(self.future_X, 250 | np.zeros((len(X), len(self.ar))), 251 | axis=1) 252 | if self.ma_windows is not None: 253 | self.future_X = np.append(self.future_X, 254 | np.zeros((len(X), len(self.ma_windows))), 255 | axis=1) 256 | for step in range(forecast_horizon): 257 | if self.ar is not None: 258 | for i, ar_order in enumerate(self.ar): 259 | column_slice = -len(self.ar) + i 260 | if step < ar_order: 261 | self.future_X[step, column_slice] = self.scaled_y[-ar_order + step] 262 | else: 263 | self.future_X[step, column_slice] = predictions[-ar_order] 264 | if self.ma_windows is not None: 265 | for i, ma_window in enumerate(self.ma_windows): 266 | column_slice = -len(self.ma_windows) + i 267 | ma = np.mean(self.full[-ma_window:]) 268 | self.future_X[step, column_slice] = ma 269 | recursive_X = self.future_X[step, :].reshape(1, -1) 270 | if self.return_proba: 271 | predicted = self.model_obj.predict_proba(recursive_X) 272 | predicted = predicted[:, 1] 273 | else: 274 | predicted = self.model_obj.predict(recursive_X) 275 | predictions.append(predicted) 276 | self.full = np.append(self.full, predictions[-1]) 277 | return np.array(predictions) 278 | 279 | def predict(self, forecast_horizon, future_X=None): 280 | X = np.arange(forecast_horizon) + len(self.scaled_y) 281 | X = X.reshape((-1, 1)) 282 | if self.n_basis is not None: 283 | basis = self.lbf.get_future_basis(self.component_dict['basis'], 284 | forecast_horizon, 285 | average=self.decay_average) 286 | X = np.append(X, basis, axis=1) 287 | if self.seasonal_period: 288 | for period in self.seasonal_period: 289 | harmonics = self.component_dict['harmonics ' + str(period)] 290 | future_harmonics = self.fbf.get_future_harmonics(harmonics, 291 | forecast_horizon, 292 | period) 293 | X = np.append(X, future_harmonics, axis=1) 294 | if self.exogenous is not None: 295 | X = np.append(X, future_X, axis=1) 296 | if self.ar is not None or self.ma_windows is not None: 297 | predicted = self.recursive_predict(X, forecast_horizon) 298 | else: 299 | if self.return_proba: 300 | predicted = self.model_obj.predict_proba(X) 301 | predicted = predicted[:, 1] 302 | else: 303 | predicted = self.model_obj.predict(X) 304 | predicted = predicted.reshape(-1,1) 305 | if self.scale == True: 306 | predicted = self.scaler.inverse_transform(predicted) 307 | if self.linear_trend: 308 | linear_trend = [i for i in range(0, forecast_horizon)] 309 | linear_trend = np.reshape(linear_trend, (len(linear_trend), 1)) 310 | linear_trend += len(self.scaled_y) + 1 311 | linear_trend = linear_trend**2 312 | if self.trend_penalty: 313 | slope = self.slope*self.penalty 314 | else: 315 | slope = self.slope 316 | linear_trend = np.multiply(linear_trend, slope) + self.intercept 317 | predicted = np.add(predicted, linear_trend.reshape(-1,1)) 318 | return predicted 319 | 320 | @classmethod 321 | def Optimize(cls, y, seasonal_period, n_folds, n_trials=100, test_size=None): 322 | optimizer = Optimize(y, 323 | LazyProphet, 324 | seasonal_period=seasonal_period, 325 | n_trials=n_trials, 326 | n_folds=n_folds, 327 | test_size=test_size) 328 | cls.study = optimizer.fit() 329 | optimized = cls.study.best_params 330 | optimized['ar'] = list(range(1, int(optimized['ar']) + 1)) 331 | optimized['n_estimators'] = int(optimized['n_estimators']) 332 | optimized['num_leaves'] = int(optimized['num_leaves']) 333 | optimized['n_basis'] = int(optimized['n_basis']) 334 | optimized['seasonal_period'] = optimized['seasonal_period'] 335 | # optimized['fourier_order'] = int(optimized['fourier_order']) 336 | return cls(**optimized) 337 | 338 | def init_opt_params(self): 339 | if self.objective == 'regression': 340 | metric = 'rmse' 341 | elif self.objective == 'classification': 342 | metric = 'cross-entropy' 343 | self.opt_params = { 344 | "objective": self.objective, 345 | "metric": metric, 346 | "verbosity": -1, 347 | "boosting_type": "gbdt", 348 | "seed": 42, 349 | 'linear_tree': False, 350 | } 351 | 352 | def tree_optimize(self, y, exogenous=None, cv_splits=3, test_size=None): 353 | self.init_opt_params() 354 | if self.n_basis is not None: 355 | if len(y) <= self.n_basis - 1: 356 | self.n_basis = int(len(y) - 1) 357 | self.exogenous = exogenous 358 | y = np.array(y) 359 | self.og_y = y 360 | if self.linear_trend: 361 | fitted_trend = self.linear_test(y) 362 | y = np.subtract(y, fitted_trend) 363 | # if self.linear_trend: 364 | # self.ar = None 365 | # self.decay = None 366 | if self.scale: 367 | self.scale_input(y) 368 | else: 369 | self.scaled_y = y.copy() 370 | self.X = self.build_input(self.scaled_y) 371 | study_tuner = optuna.create_study(direction='minimize') 372 | dtrain = lgb.Dataset(self.X, label=self.scaled_y) 373 | optuna.logging.set_verbosity(optuna.logging.CRITICAL) 374 | tscv = TimeSeriesSplit(n_splits=cv_splits, test_size=test_size) 375 | tuner = lgb.LightGBMTunerCV(self.opt_params, 376 | dtrain, 377 | study=study_tuner, 378 | verbose_eval=False, 379 | early_stopping_rounds=10, 380 | seed = 42, 381 | folds=tscv, 382 | num_boost_round=500, 383 | show_progress_bar=False 384 | ) 385 | 386 | tuner.run() 387 | best_params = tuner.best_params 388 | self.model_obj = gbm.LGBMRegressor(**best_params) 389 | self.model_obj.fit(self.X, self.scaled_y) 390 | fitted = self.model_obj.predict(self.X).reshape(-1,1) 391 | if self.scale: 392 | fitted = self.scaler.inverse_transform(fitted) 393 | if self.linear_trend: 394 | fitted = np.add(fitted.reshape(-1,1), fitted_trend.reshape(-1,1)) 395 | return fitted 396 | 397 | 398 | 399 | 400 | 401 | -------------------------------------------------------------------------------- /LazyProphet/LinearBasisFunction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import numpy as np 4 | 5 | 6 | class LinearBasisFunction: 7 | 8 | def __init__(self, n_changepoints, decay=None, weighted=True): 9 | self.n_changepoints = n_changepoints 10 | self.decay = decay 11 | self.weighted = weighted 12 | 13 | def get_basis(self, y): 14 | y = y.copy() 15 | y -= y[0] 16 | mean_y = np.mean(y) 17 | n_changepoints = self.n_changepoints 18 | array_splits = np.array_split(np.array(y),n_changepoints + 1)[:-1] 19 | if self.weighted: 20 | initial_point = y[0] 21 | final_point = y[-1] 22 | else: 23 | initial_point = 0 24 | final_point = 0 25 | changepoints = np.zeros(shape=(len(y), n_changepoints)) 26 | len_splits = 0 27 | for i in range(n_changepoints): 28 | len_splits += len(array_splits[i]) 29 | if self.weighted: 30 | moving_point = array_splits[i][-1] 31 | else: 32 | moving_point = 1 33 | left_basis = np.linspace(initial_point, 34 | moving_point, 35 | len_splits) 36 | end_point = self.add_decay(moving_point, final_point, mean_y) 37 | right_basis = np.linspace(moving_point, 38 | end_point, 39 | len(y) - len_splits + 1) 40 | changepoints[:, i] = np.append(left_basis, right_basis[1:]) 41 | return changepoints 42 | 43 | def add_decay(self, moving_point, final_point, mean_point): 44 | if self.decay is None: 45 | return final_point 46 | else: 47 | if self.decay == 'auto': 48 | dd = max(.001, min(.99, moving_point**2 / (mean_point**2))) 49 | return moving_point - ((moving_point - final_point) * (1 - dd)) 50 | else: 51 | return moving_point - ((moving_point - final_point) * (1 - self.decay)) 52 | 53 | def get_future_basis(self, basis_functions, forecast_horizon, average=False): 54 | n_components = np.shape(basis_functions)[1] 55 | slopes = np.gradient(basis_functions, axis=0)[-1, :] 56 | future_basis = np.array(np.arange(0, forecast_horizon + 1)) 57 | future_basis += len(basis_functions) 58 | future_basis = np.transpose([future_basis] * n_components) 59 | future_basis = future_basis * slopes 60 | future_basis = future_basis + (basis_functions[-1, :] - future_basis[0, :]) 61 | if average: 62 | future_basis = np.transpose([np.mean(future_basis, axis=1)] * n_components) 63 | return future_basis[1:, :] 64 | 65 | 66 | -------------------------------------------------------------------------------- /LazyProphet/Optimizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from sklearn.model_selection import TimeSeriesSplit 3 | from sklearn.metrics import mean_squared_error 4 | import time 5 | import numpy as np 6 | import optuna 7 | optuna.logging.set_verbosity(optuna.logging.WARNING) 8 | 9 | 10 | class Optimize: 11 | def __init__(self, 12 | y, 13 | lazyprophet_class, 14 | seasonal_period=0, 15 | n_folds=3, 16 | test_size=None, 17 | n_trials=100): 18 | self.y = y 19 | self.lazyprophet_class = lazyprophet_class 20 | if isinstance(seasonal_period, list): 21 | self.max_pulse = max(seasonal_period) 22 | else: 23 | self.max_pulse = seasonal_period 24 | self.seasonal_period = seasonal_period 25 | self.n_folds = n_folds 26 | self.test_size = test_size 27 | self.n_trials = n_trials 28 | 29 | def logic_layer(self): 30 | n_samples = len(self.y) 31 | test_size = n_samples//(self.n_folds + 1) 32 | if n_samples - test_size < self.max_pulse: 33 | self.seasonal_period = 0 34 | 35 | def scorer(self, model_obj, y, metric, cv): 36 | cv_splits = cv.split(y) 37 | mses = [] 38 | for train_index, test_index in cv_splits: 39 | try: 40 | model_obj.fit(y[train_index]) 41 | predicted = model_obj.predict(len(y[test_index])) 42 | mses.append(mean_squared_error(y[test_index], predicted)) 43 | except: 44 | mses.append(np.inf) 45 | return np.mean(mses) 46 | 47 | 48 | def objective(self, trial): 49 | params = { 50 | "n_estimators": trial.suggest_int(name="n_estimators", low=25, high=500), 51 | "num_leaves": trial.suggest_int("num_leaves", 8, 128), 52 | "n_basis": trial.suggest_int("n_basis", 0, 15), 53 | "decay": trial.suggest_categorical("decay", ['auto', 54 | .05, 55 | .1, 56 | .25, 57 | .5, 58 | .75, 59 | .9, 60 | .99]), 61 | } 62 | if self.seasonal_period: 63 | params.update({'seasonal_period': trial.suggest_categorical("seasonal_period", [None, self.seasonal_period])}) 64 | params.update({'ar': trial.suggest_int(name="ar", low=0, high=self.max_pulse)}) 65 | else: 66 | params.update({'ar': trial.suggest_int(name="ar", low=0, high=4)}) 67 | params['ar'] = list(range(1, 1 + params['ar'])) 68 | clf = self.lazyprophet_class(**params) 69 | score = self.scorer(clf, self.y, mean_squared_error, TimeSeriesSplit(self.n_folds, test_size=self.test_size)) 70 | return score 71 | 72 | def fit(self): 73 | study = optuna.create_study(direction="minimize") 74 | study.optimize(self.objective, n_trials=self.n_trials) 75 | return study 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /LazyProphet/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /LazyProphet/__pycache__/FourierBasisFunction.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/LazyProphet/__pycache__/FourierBasisFunction.cpython-37.pyc -------------------------------------------------------------------------------- /LazyProphet/__pycache__/LazyProphet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/LazyProphet/__pycache__/LazyProphet.cpython-37.pyc -------------------------------------------------------------------------------- /LazyProphet/__pycache__/LinearBasisFunction.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/LazyProphet/__pycache__/LinearBasisFunction.cpython-37.pyc -------------------------------------------------------------------------------- /LazyProphet/__pycache__/Optimizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/LazyProphet/__pycache__/Optimizer.cpython-37.pyc -------------------------------------------------------------------------------- /LazyProphet/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/LazyProphet/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /LazyProphet/static/example_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/LazyProphet/static/example_output.png -------------------------------------------------------------------------------- /LazyProphet/static/lp_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/LazyProphet/static/lp_logo.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LazyProphet v0.3.8 2 | 3 | ![alt text](https://github.com/tblume1992/LazyProphet/blob/main/LazyProphet/static/lp_logo.png "logo") 4 | 5 | ## Recent Changes 6 | 7 | With v0.3.8 comes a fully fledged Optuna Optimizer for simple (no exogenous) regression problems. Classification is ToDo. 8 | 9 | A Quick example of the new functionality: 10 | 11 | ``` 12 | from LazyProphet import LazyProphet as lp 13 | from sklearn.datasets import fetch_openml 14 | import matplotlib.pyplot as plt 15 | 16 | bike_sharing = fetch_openml("Bike_Sharing_Demand", version=2, as_frame=True) 17 | y = bike_sharing.frame['count'] 18 | y = y[-400:].values 19 | 20 | lp_model = lp.LazyProphet.Optimize(y, 21 | seasonal_period=[24, 168], 22 | n_folds=2, # must be greater than 1 23 | n_trials=20, # number of optimization runs, default is 100 24 | test_size=48 # size of the holdout set to test against 25 | ) 26 | fitted = lp_model.fit(y) 27 | predicted = lp_model.predict(100) 28 | 29 | plt.plot(y) 30 | plt.plot(np.append(fitted, predicted)) 31 | plt.axvline(400) 32 | plt.show() 33 | ``` 34 | 35 | ## Introduction 36 | 37 | [A decent intro can be found here.](https://medium.com/p/3745bafe5ce5) 38 | 39 | LazyProphet is a time series forecasting model built for LightGBM forecasting of single time series. 40 | 41 | Many nice-ities have been added such as recursive forecasting when using lagged target variable such as the last 4 values to predict the 5th. 42 | 43 | Additionally, fourier basis functions and penalized weighted piecewise linear basis functions are options as well! 44 | 45 | Don't ever use in-sample fit for these types of models as they fit the data quite snuggly. 46 | 47 | ## Quickstart 48 | 49 | ``` 50 | pip install LazyProphet 51 | ``` 52 | 53 | Simple example from Sklearn, just give it the hyperparameters and an array: 54 | 55 | ``` 56 | from LazyProphet import LazyProphet as lp 57 | from sklearn.datasets import fetch_openml 58 | import matplotlib.pyplot as plt 59 | 60 | bike_sharing = fetch_openml("Bike_Sharing_Demand", version=2, as_frame=True) 61 | y = bike_sharing.frame['count'] 62 | y = y[-400:].values 63 | 64 | lp_model = lp.LazyProphet(seasonal_period=[24, 168], #list means we use both seasonal periods 65 | n_basis=4, #weighted piecewise basis functions 66 | fourier_order=10, 67 | ar=list(range(1,25)), 68 | decay=.99 #the 'penalized' in penalized weighted piecewise linear basis functions 69 | ) 70 | fitted = lp_model.fit(y) 71 | predicted = lp_model.predict(100) 72 | 73 | plt.plot(y) 74 | plt.plot(np.append(fitted, predicted)) 75 | plt.axvline(400) 76 | plt.show() 77 | ``` 78 | ![alt text](https://github.com/tblume1992/LazyProphet/blob/main/LazyProphet/static/example_output.png "Output 1") 79 | 80 | If you are working with less data or then you will probably want to pass custom LightGBM params via boosting_params when creating the LazyProphet obj. 81 | 82 | The default params are: 83 | 84 | ``` 85 | boosting_params = { 86 | "objective": "regression", 87 | "metric": "rmse", 88 | "verbosity": -1, 89 | "boosting_type": "gbdt", 90 | "seed": 42, 91 | 'linear_tree': False, 92 | 'learning_rate': .15, 93 | 'min_child_samples': 5, 94 | 'num_leaves': 31, 95 | 'num_iterations': 50 96 | } 97 | ``` 98 | *WARNING* 99 | Passing linear_tree=True can be extremely unstable, especially with ar and n_basis arguments. We do tests for linearity and will de-trend if necessary. 100 | ** 101 | 102 | Most importantly for controlling the complexity by using num_leaves/learning_rate for complexity with less data. 103 | 104 | Alternatively, you could try out the method: 105 | 106 | ``` 107 | tree_optimize(y, exogenous=None, cv_splits=3, test_size=None) 108 | ``` 109 | In-place of the fit method. This will do 'cv_splits' number of Time-Series Cross-Validation steps to optimize the tree using Optuna. This method has some degraded performance in testing but may be better for autoforecasting various types of data sizes. 110 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /build/lib/LazyProphet/FourierBasisFunction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | 4 | 5 | class FourierBasisFunction: 6 | 7 | def __init__(self, fourier_order, seasonal_weights=None): 8 | self.fourier_order = fourier_order 9 | self.seasonal_weights = seasonal_weights 10 | if self.seasonal_weights is not None: 11 | self.seasonal_weights = np.array(self.seasonal_weights).reshape((-1, 1)) 12 | 13 | def get_fourier_series(self, y, seasonal_period): 14 | x = 2 * np.pi * np.arange(1, self.fourier_order + 1) / seasonal_period 15 | t = np.arange(1, len(y) + 1) 16 | x = x * t[:, None] 17 | fourier_series = np.concatenate((np.cos(x), np.sin(x)), axis=1) 18 | return fourier_series 19 | 20 | def get_harmonics(self, y, seasonal_period): 21 | harmonics = self.get_fourier_series(y, seasonal_period) 22 | if self.seasonal_weights is not None: 23 | harmonics = harmonics * self.seasonal_weights 24 | return harmonics 25 | 26 | def get_future_harmonics(self, harmonics, forecast_horizon, seasonal_period): 27 | total_length = len(harmonics) + forecast_horizon 28 | future_harmonics = self.get_fourier_series(np.arange(total_length), seasonal_period) 29 | if self.seasonal_weights is None: 30 | return future_harmonics[len(harmonics):, :] 31 | else: 32 | return future_harmonics[len(harmonics):, :] * self.seasonal_weights[-1] 33 | 34 | -------------------------------------------------------------------------------- /build/lib/LazyProphet/LazyProphet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Feb 12 08:19:32 2022 4 | 5 | @author: Tyler Blume 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | from sklearn.preprocessing import StandardScaler 11 | from sklearn.model_selection import TimeSeriesSplit 12 | import optuna.integration.lightgbm as lgb 13 | import optuna 14 | from scipy import stats 15 | import lightgbm as gbm 16 | import warnings 17 | from LazyProphet.Optimizer import Optimize 18 | from LazyProphet.LinearBasisFunction import LinearBasisFunction 19 | from LazyProphet.FourierBasisFunction import FourierBasisFunction 20 | warnings.filterwarnings("ignore") 21 | 22 | 23 | class LazyProphet: 24 | 25 | def __init__(self, 26 | objective='regression', 27 | seasonal_period=None, 28 | fourier_order=10, 29 | n_basis=10, 30 | ar=None, 31 | ma_windows=None, 32 | decay=None, 33 | scale=True, 34 | weighted=True, 35 | decay_average=False, 36 | seasonality_weights=None, 37 | linear_trend='auto', 38 | boosting_params=None, 39 | series_features=None, 40 | return_proba=False, 41 | trend_penalty=True, 42 | n_estimators=50, 43 | num_leaves=31, 44 | learning_rate=.1, 45 | colsample_bytree=1): 46 | self.objective = objective 47 | self.trend_penalty = trend_penalty 48 | self.exogenous = None 49 | if seasonal_period is not None: 50 | if not isinstance(seasonal_period, list): 51 | seasonal_period = [seasonal_period] 52 | self.seasonal_period = seasonal_period 53 | # if num_leaves < 2: 54 | # num_leaves = 2 55 | if objective == 'classification': 56 | scale = False 57 | linear_trend = False 58 | self.scale = scale 59 | if ar is not None: 60 | if not isinstance(ar, list): 61 | ar = [ar] 62 | self.ar = ar 63 | if ma_windows is not None: 64 | if not isinstance(ma_windows, list): 65 | ma_windows = [ma_windows] 66 | self.ma_windows = ma_windows 67 | self.fourier_order = int(fourier_order) 68 | self.decay = decay 69 | if n_basis: 70 | self.n_basis = int(n_basis) 71 | else: 72 | self.n_basis = None 73 | self.weighted = weighted 74 | self.series_features = series_features 75 | self.component_dict = {} 76 | self.decay_average = decay_average 77 | self.seasonality_weights = seasonality_weights 78 | self.linear_trend = linear_trend 79 | self.return_proba = return_proba 80 | if self.objective == 'regression': 81 | metric = 'rmse' 82 | objective = 'regression' 83 | elif self.objective == 'classification': 84 | metric = 'cross-entropy' 85 | objective = 'binary' 86 | else: 87 | metric = 'None' 88 | if boosting_params is None: 89 | self.boosting_params = { 90 | "objective": objective, 91 | "metric": metric, 92 | "verbosity": -1, 93 | "boosting_type": "gbdt", 94 | "seed": 42, 95 | 'linear_tree': False, 96 | 'learning_rate': learning_rate, 97 | 'colsample_bytree': colsample_bytree, 98 | 'min_child_samples': 5, 99 | 'num_leaves': num_leaves, 100 | 'num_iterations': n_estimators 101 | } 102 | else: 103 | self.boosting_params = boosting_params 104 | 105 | def linear_test(self, y): 106 | y = y.copy().reshape((-1,)) 107 | xi = np.arange(1, len(y) + 1) 108 | xi = xi**2 109 | slope, intercept, r_value, p_value, std_err = stats.linregress(xi,y) 110 | trend_line = slope*xi*r_value + intercept 111 | if self.linear_trend is None or self.linear_trend == 'auto': 112 | n_bins = (1 + len(y)**(1/3) * 2) 113 | # n_bins = int(len(y) / 13) 114 | splitted_array = np.array_split(y.reshape(-1,), int(n_bins)) 115 | mean_splits = np.array([np.mean(i) for i in splitted_array]) 116 | grad = np.gradient(mean_splits) 117 | threshold = .9 * n_bins 118 | if sum(grad < 0) >= threshold or sum(grad > 0) >= threshold: 119 | growth = True 120 | # print('True') 121 | # asc_array = np.sort(mean_splits) 122 | # desc_array = np.flip(asc_array) 123 | # if all(asc_array == mean_splits): 124 | # growth = True 125 | # elif all(desc_array == mean_splits): 126 | # growth = True 127 | else: 128 | growth = False 129 | if (growth): 130 | self.linear_trend = True 131 | else: 132 | self.linear_trend = False 133 | self.slope = slope * r_value 134 | self.penalty = r_value 135 | self.intercept = intercept 136 | return trend_line 137 | 138 | def get_piecewise(self, y): 139 | self.lbf = LinearBasisFunction(n_changepoints=int(self.n_basis), 140 | decay=self.decay, 141 | weighted=self.weighted) 142 | basis = self.lbf.get_basis(y) 143 | return basis 144 | 145 | def get_harmonics(self, y, seasonal_period): 146 | self.fbf = FourierBasisFunction(int(self.fourier_order), 147 | self.seasonality_weights) 148 | basis = self.fbf.get_harmonics(y, seasonal_period) 149 | return basis 150 | 151 | @staticmethod 152 | def shift(xs, n): 153 | e = np.empty_like(xs) 154 | if n >= 0: 155 | e[:n] = np.nan 156 | e[n:] = xs[:-n] 157 | else: 158 | e[n:] = np.nan 159 | e[:n] = xs[-n:] 160 | return e 161 | 162 | @staticmethod 163 | def moving_average(y, window): 164 | y = pd.Series(y.reshape(-1,)) 165 | ma = np.array(y.rolling(window).mean()) 166 | return ma.reshape((-1, 1)) 167 | 168 | def build_input(self, y, exogenous=None): 169 | X = np.arange(len(y)) 170 | X = X.reshape((-1, 1)) 171 | if self.n_basis is not None: 172 | if len(y) <= self.n_basis - 1: 173 | self.n_basis = int(len(y) - 1) 174 | self.basis = self.get_piecewise(y) 175 | X = np.append(X, self.basis, axis=1) 176 | self.component_dict['basis'] = self.basis 177 | if self.seasonal_period: 178 | for period in self.seasonal_period: 179 | harmonics = self.get_harmonics(y, period) 180 | self.component_dict['harmonics ' + str(period)] = harmonics 181 | X = np.append(X, harmonics, axis=1) 182 | if self.exogenous is not None: 183 | X = np.append(X, exogenous, axis=1) 184 | if self.ar is not None: 185 | for ar_order in self.ar: 186 | shifted_y = self.scaled_y.copy() 187 | shifted_y = LazyProphet.shift(shifted_y, ar_order) 188 | X = np.append(X, shifted_y.reshape(-1, 1), axis=1) 189 | if self.ma_windows is not None: 190 | for ma_order in self.ma_windows: 191 | ma = LazyProphet.moving_average(self.scaled_y, ma_order) 192 | X = np.append(X, ma, axis=1) 193 | return X 194 | 195 | def scale_input(self, y): 196 | self.scaler = StandardScaler() 197 | self.scaler.fit(np.asarray(y).reshape(-1, 1)) 198 | self.scaled_y = y.copy() 199 | self.scaled_y = self.scaler.transform(self.scaled_y.reshape(-1, 1)) 200 | 201 | def fit(self, y, X=None): 202 | self.exogenous = X 203 | self.og_y = y 204 | if self.series_features is None: 205 | y = np.array(y) 206 | else: 207 | y = self.series_features 208 | if self.linear_trend is None or self.linear_trend: 209 | fitted_trend = self.linear_test(y) 210 | if self.linear_trend: 211 | y = np.subtract(y, fitted_trend) 212 | #TODO: Should we disable here? 213 | # if self.linear_trend: 214 | # self.ar = None 215 | # self.decay = None 216 | if self.scale: 217 | self.scale_input(y) 218 | else: 219 | self.scaled_y = y.copy() 220 | self.X = self.build_input(self.scaled_y, X) 221 | if self.objective == 'regression': 222 | self.model_obj = gbm.LGBMRegressor(**self.boosting_params) 223 | if self.objective == 'classification': 224 | self.model_obj = gbm.LGBMClassifier(**self.boosting_params) 225 | if self.series_features is None: 226 | self.model_obj.fit(self.X, self.scaled_y.reshape(-1, )) 227 | else: 228 | self.model_obj.fit(self.X, self.og_y.reshape(-1, )) 229 | #commented out from basic feature selection 230 | # self.columns = pd.Series(lp_model.model_obj.feature_importances_).sort_values().index[-100:] 231 | # self.model_obj.fit(self.X[:, self.columns], self.scaled_y.reshape(-1, )) 232 | if self.return_proba: 233 | fitted = self.model_obj.predict_proba(self.X) 234 | fitted = fitted[:, 1].reshape(-1,1) 235 | else: 236 | fitted = self.model_obj.predict(self.X).reshape(-1,1) 237 | if self.scale: 238 | fitted = self.scaler.inverse_transform(fitted) 239 | if self.linear_trend: 240 | fitted = np.add(fitted.reshape(-1,1), fitted_trend.reshape(-1,1)) 241 | return fitted 242 | 243 | def recursive_predict(self, X, forecast_horizon): 244 | self.future_X = X 245 | #TODO: This is just...horrible 246 | predictions = [] 247 | self.full = self.scaled_y.copy() 248 | if self.ar is not None: 249 | self.future_X = np.append(self.future_X, 250 | np.zeros((len(X), len(self.ar))), 251 | axis=1) 252 | if self.ma_windows is not None: 253 | self.future_X = np.append(self.future_X, 254 | np.zeros((len(X), len(self.ma_windows))), 255 | axis=1) 256 | for step in range(forecast_horizon): 257 | if self.ar is not None: 258 | for i, ar_order in enumerate(self.ar): 259 | column_slice = -len(self.ar) + i 260 | if step < ar_order: 261 | self.future_X[step, column_slice] = self.scaled_y[-ar_order + step] 262 | else: 263 | self.future_X[step, column_slice] = predictions[-ar_order] 264 | if self.ma_windows is not None: 265 | for i, ma_window in enumerate(self.ma_windows): 266 | column_slice = -len(self.ma_windows) + i 267 | ma = np.mean(self.full[-ma_window:]) 268 | self.future_X[step, column_slice] = ma 269 | recursive_X = self.future_X[step, :].reshape(1, -1) 270 | if self.return_proba: 271 | predicted = self.model_obj.predict_proba(recursive_X) 272 | predicted = predicted[:, 1] 273 | else: 274 | predicted = self.model_obj.predict(recursive_X) 275 | predictions.append(predicted) 276 | self.full = np.append(self.full, predictions[-1]) 277 | return np.array(predictions) 278 | 279 | def predict(self, forecast_horizon, future_X=None): 280 | X = np.arange(forecast_horizon) + len(self.scaled_y) 281 | X = X.reshape((-1, 1)) 282 | if self.n_basis is not None: 283 | basis = self.lbf.get_future_basis(self.component_dict['basis'], 284 | forecast_horizon, 285 | average=self.decay_average) 286 | X = np.append(X, basis, axis=1) 287 | if self.seasonal_period: 288 | for period in self.seasonal_period: 289 | harmonics = self.component_dict['harmonics ' + str(period)] 290 | future_harmonics = self.fbf.get_future_harmonics(harmonics, 291 | forecast_horizon, 292 | period) 293 | X = np.append(X, future_harmonics, axis=1) 294 | if self.exogenous is not None: 295 | X = np.append(X, future_X, axis=1) 296 | if self.ar is not None or self.ma_windows is not None: 297 | predicted = self.recursive_predict(X, forecast_horizon) 298 | else: 299 | if self.return_proba: 300 | predicted = self.model_obj.predict_proba(X) 301 | predicted = predicted[:, 1] 302 | else: 303 | predicted = self.model_obj.predict(X) 304 | predicted = predicted.reshape(-1,1) 305 | if self.scale == True: 306 | predicted = self.scaler.inverse_transform(predicted) 307 | if self.linear_trend: 308 | linear_trend = [i for i in range(0, forecast_horizon)] 309 | linear_trend = np.reshape(linear_trend, (len(linear_trend), 1)) 310 | linear_trend += len(self.scaled_y) + 1 311 | linear_trend = linear_trend**2 312 | if self.trend_penalty: 313 | slope = self.slope*self.penalty 314 | else: 315 | slope = self.slope 316 | linear_trend = np.multiply(linear_trend, slope) + self.intercept 317 | predicted = np.add(predicted, linear_trend.reshape(-1,1)) 318 | return predicted 319 | 320 | @classmethod 321 | def Optimize(cls, y, seasonal_period, n_folds, n_trials=100, test_size=None): 322 | optimizer = Optimize(y, 323 | LazyProphet, 324 | seasonal_period=seasonal_period, 325 | n_trials=n_trials, 326 | n_folds=n_folds, 327 | test_size=test_size) 328 | cls.study = optimizer.fit() 329 | optimized = cls.study.best_params 330 | optimized['ar'] = list(range(1, int(optimized['ar']) + 1)) 331 | optimized['n_estimators'] = int(optimized['n_estimators']) 332 | optimized['num_leaves'] = int(optimized['num_leaves']) 333 | optimized['n_basis'] = int(optimized['n_basis']) 334 | optimized['seasonal_period'] = optimized['seasonal_period'] 335 | # optimized['fourier_order'] = int(optimized['fourier_order']) 336 | return cls(**optimized) 337 | 338 | def init_opt_params(self): 339 | if self.objective == 'regression': 340 | metric = 'rmse' 341 | elif self.objective == 'classification': 342 | metric = 'cross-entropy' 343 | self.opt_params = { 344 | "objective": self.objective, 345 | "metric": metric, 346 | "verbosity": -1, 347 | "boosting_type": "gbdt", 348 | "seed": 42, 349 | 'linear_tree': False, 350 | } 351 | 352 | def tree_optimize(self, y, exogenous=None, cv_splits=3, test_size=None): 353 | self.init_opt_params() 354 | if self.n_basis is not None: 355 | if len(y) <= self.n_basis - 1: 356 | self.n_basis = int(len(y) - 1) 357 | self.exogenous = exogenous 358 | y = np.array(y) 359 | self.og_y = y 360 | if self.linear_trend: 361 | fitted_trend = self.linear_test(y) 362 | y = np.subtract(y, fitted_trend) 363 | # if self.linear_trend: 364 | # self.ar = None 365 | # self.decay = None 366 | if self.scale: 367 | self.scale_input(y) 368 | else: 369 | self.scaled_y = y.copy() 370 | self.X = self.build_input(self.scaled_y) 371 | study_tuner = optuna.create_study(direction='minimize') 372 | dtrain = lgb.Dataset(self.X, label=self.scaled_y) 373 | optuna.logging.set_verbosity(optuna.logging.CRITICAL) 374 | tscv = TimeSeriesSplit(n_splits=cv_splits, test_size=test_size) 375 | tuner = lgb.LightGBMTunerCV(self.opt_params, 376 | dtrain, 377 | study=study_tuner, 378 | verbose_eval=False, 379 | early_stopping_rounds=10, 380 | seed = 42, 381 | folds=tscv, 382 | num_boost_round=500, 383 | show_progress_bar=False 384 | ) 385 | 386 | tuner.run() 387 | best_params = tuner.best_params 388 | self.model_obj = gbm.LGBMRegressor(**best_params) 389 | self.model_obj.fit(self.X, self.scaled_y) 390 | fitted = self.model_obj.predict(self.X).reshape(-1,1) 391 | if self.scale: 392 | fitted = self.scaler.inverse_transform(fitted) 393 | if self.linear_trend: 394 | fitted = np.add(fitted.reshape(-1,1), fitted_trend.reshape(-1,1)) 395 | return fitted 396 | 397 | 398 | 399 | 400 | 401 | -------------------------------------------------------------------------------- /build/lib/LazyProphet/LinearBasisFunction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import numpy as np 4 | 5 | 6 | class LinearBasisFunction: 7 | 8 | def __init__(self, n_changepoints, decay=None, weighted=True): 9 | self.n_changepoints = n_changepoints 10 | self.decay = decay 11 | self.weighted = weighted 12 | 13 | def get_basis(self, y): 14 | y = y.copy() 15 | y -= y[0] 16 | mean_y = np.mean(y) 17 | n_changepoints = self.n_changepoints 18 | array_splits = np.array_split(np.array(y),n_changepoints + 1)[:-1] 19 | if self.weighted: 20 | initial_point = y[0] 21 | final_point = y[-1] 22 | else: 23 | initial_point = 0 24 | final_point = 0 25 | changepoints = np.zeros(shape=(len(y), n_changepoints)) 26 | len_splits = 0 27 | for i in range(n_changepoints): 28 | len_splits += len(array_splits[i]) 29 | if self.weighted: 30 | moving_point = array_splits[i][-1] 31 | else: 32 | moving_point = 1 33 | left_basis = np.linspace(initial_point, 34 | moving_point, 35 | len_splits) 36 | end_point = self.add_decay(moving_point, final_point, mean_y) 37 | right_basis = np.linspace(moving_point, 38 | end_point, 39 | len(y) - len_splits + 1) 40 | changepoints[:, i] = np.append(left_basis, right_basis[1:]) 41 | return changepoints 42 | 43 | def add_decay(self, moving_point, final_point, mean_point): 44 | if self.decay is None: 45 | return final_point 46 | else: 47 | if self.decay == 'auto': 48 | dd = max(.001, min(.99, moving_point**2 / (mean_point**2))) 49 | return moving_point - ((moving_point - final_point) * (1 - dd)) 50 | else: 51 | return moving_point - ((moving_point - final_point) * (1 - self.decay)) 52 | 53 | def get_future_basis(self, basis_functions, forecast_horizon, average=False): 54 | n_components = np.shape(basis_functions)[1] 55 | slopes = np.gradient(basis_functions, axis=0)[-1, :] 56 | future_basis = np.array(np.arange(0, forecast_horizon + 1)) 57 | future_basis += len(basis_functions) 58 | future_basis = np.transpose([future_basis] * n_components) 59 | future_basis = future_basis * slopes 60 | future_basis = future_basis + (basis_functions[-1, :] - future_basis[0, :]) 61 | if average: 62 | future_basis = np.transpose([np.mean(future_basis, axis=1)] * n_components) 63 | return future_basis[1:, :] 64 | 65 | 66 | -------------------------------------------------------------------------------- /build/lib/LazyProphet/Optimizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from sklearn.model_selection import TimeSeriesSplit 3 | from sklearn.metrics import mean_squared_error 4 | import time 5 | import numpy as np 6 | import optuna 7 | optuna.logging.set_verbosity(optuna.logging.WARNING) 8 | 9 | 10 | class Optimize: 11 | def __init__(self, 12 | y, 13 | lazyprophet_class, 14 | seasonal_period=0, 15 | n_folds=3, 16 | test_size=None, 17 | n_trials=100): 18 | self.y = y 19 | self.lazyprophet_class = lazyprophet_class 20 | if isinstance(seasonal_period, list): 21 | self.max_pulse = max(seasonal_period) 22 | else: 23 | self.max_pulse = seasonal_period 24 | self.seasonal_period = seasonal_period 25 | self.n_folds = n_folds 26 | self.test_size = test_size 27 | self.n_trials = n_trials 28 | 29 | def logic_layer(self): 30 | n_samples = len(self.y) 31 | test_size = n_samples//(self.n_folds + 1) 32 | if n_samples - test_size < self.max_pulse: 33 | self.seasonal_period = 0 34 | 35 | def scorer(self, model_obj, y, metric, cv): 36 | cv_splits = cv.split(y) 37 | mses = [] 38 | for train_index, test_index in cv_splits: 39 | try: 40 | model_obj.fit(y[train_index]) 41 | predicted = model_obj.predict(len(y[test_index])) 42 | mses.append(mean_squared_error(y[test_index], predicted)) 43 | except: 44 | mses.append(np.inf) 45 | return np.mean(mses) 46 | 47 | 48 | def objective(self, trial): 49 | params = { 50 | "n_estimators": trial.suggest_int(name="n_estimators", low=25, high=500), 51 | "num_leaves": trial.suggest_int("num_leaves", 8, 128), 52 | "n_basis": trial.suggest_int("n_basis", 0, 15), 53 | "decay": trial.suggest_categorical("decay", ['auto', 54 | .05, 55 | .1, 56 | .25, 57 | .5, 58 | .75, 59 | .9, 60 | .99]), 61 | } 62 | if self.seasonal_period: 63 | params.update({'seasonal_period': trial.suggest_categorical("seasonal_period", [None, self.seasonal_period])}) 64 | params.update({'ar': trial.suggest_int(name="ar", low=0, high=self.max_pulse)}) 65 | else: 66 | params.update({'ar': trial.suggest_int(name="ar", low=0, high=4)}) 67 | params['ar'] = list(range(1, 1 + params['ar'])) 68 | clf = self.lazyprophet_class(**params) 69 | score = self.scorer(clf, self.y, mean_squared_error, TimeSeriesSplit(self.n_folds, test_size=self.test_size)) 70 | return score 71 | 72 | def fit(self): 73 | study = optuna.create_study(direction="minimize") 74 | study.optimize(self.objective, n_trials=self.n_trials) 75 | return study 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /build/lib/LazyProphet/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /dist/LazyProphet-0.3.3-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/dist/LazyProphet-0.3.3-py3-none-any.whl -------------------------------------------------------------------------------- /dist/LazyProphet-0.3.4-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/dist/LazyProphet-0.3.4-py3-none-any.whl -------------------------------------------------------------------------------- /dist/LazyProphet-0.3.5-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/dist/LazyProphet-0.3.5-py3-none-any.whl -------------------------------------------------------------------------------- /dist/LazyProphet-0.3.6-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/dist/LazyProphet-0.3.6-py3-none-any.whl -------------------------------------------------------------------------------- /dist/LazyProphet-0.3.7-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/dist/LazyProphet-0.3.7-py3-none-any.whl -------------------------------------------------------------------------------- /dist/LazyProphet-0.3.8-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tblume1992/LazyProphet/710218db95252c061c72ddf3a2ee64a5b9992384/dist/LazyProphet-0.3.8-py3-none-any.whl -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | @author: Tyler Blume 5 | """ 6 | 7 | 8 | # -*- coding: utf-8 -*- 9 | import setuptools 10 | 11 | with open("README.md", "r") as fh: 12 | long_description = fh.read() 13 | 14 | setuptools.setup( 15 | name="LazyProphet", 16 | version="0.3.8", 17 | author="Tyler Blume", 18 | url="https://github.com/tblume1992/LazyProphet", 19 | long_description=long_description, 20 | long_description_content_type="text/markdown", 21 | description = "Time series forecasting with LightGBM", 22 | author_email = 'tblume@mail.USF.edu', 23 | keywords = ['forecasting', 'time series', 'lightgbm'], 24 | install_requires=[ 25 | 'numpy', 26 | 'pandas', 27 | 'statsmodels', 28 | 'scikit-learn', 29 | 'optuna', 30 | 'scipy', 31 | 'matplotlib', 32 | 'lightgbm' 33 | ], 34 | packages=setuptools.find_packages(), 35 | classifiers=[ 36 | "Programming Language :: Python :: 3", 37 | "License :: OSI Approved :: MIT License", 38 | "Operating System :: OS Independent", 39 | ], 40 | ) --------------------------------------------------------------------------------