├── .gitignore ├── LICENSE.md ├── README.md ├── data └── ga_sessions.csv ├── ideation.txt ├── requirements-no-vers.txt ├── requirements.txt ├── setup.py ├── src └── forecastga │ ├── __init__.py │ ├── auto.py │ ├── ensembles.py │ ├── ga │ ├── __init__.py │ ├── account.py │ ├── auth │ │ ├── __init__.py │ │ ├── credentials.py │ │ └── oauth.py │ ├── blueprint.py │ ├── columns.py │ ├── errors.py │ ├── query.py │ ├── realtime.yml │ ├── segments.py │ └── utils │ │ ├── __init__.py │ │ ├── date.py │ │ ├── functional.py │ │ ├── server.py │ │ └── string.py │ ├── helpers │ ├── colab.py │ ├── data.py │ ├── ga_data.py │ ├── holidays.py │ ├── logging.py │ └── ssa.py │ ├── models │ ├── __init__.py │ ├── arima.py │ ├── base.py │ ├── gluonts.py │ ├── hwaas.py │ ├── hwams.py │ ├── nbeats.py │ ├── prophet.py │ ├── prophet_boxcox.py │ ├── pyaf.py │ ├── sarima.py │ ├── sarimax.py │ ├── tats.py │ ├── tbat.py │ ├── tbatp1.py │ ├── tbats1.py │ ├── tbats2.py │ └── template.py │ └── stan │ ├── unix │ └── prophet.stan │ └── win │ └── prophet.stan ├── test.py └── tests └── googleanalytics ├── __init__.py ├── base.py ├── meta.py ├── query.py └── report.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | pip-wheel-metadata/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | /src/*.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | etc/ 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .venv 112 | env/ 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | .dmypy.json 131 | dmypy.json 132 | 133 | # Pyre type checker 134 | .pyre/ 135 | 136 | # pytype static type analyzer 137 | .pytype/ 138 | 139 | # Cython debug symbols 140 | cython_debug/ 141 | 142 | 143 | # static files generated from Django application using `collectstatic` 144 | media 145 | static 146 | 147 | # Local 148 | *.log 149 | *.yaml 150 | identity.json 151 | src/test.py 152 | not_used/ 153 | pyproject.toml 154 | *.lock 155 | .bumpversion.cfg 156 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © 2020 JR Oakes 5 | 6 | Permission is hereby granted, free of charge, to any person 7 | obtaining a copy of this software and associated documentation 8 | files (the “Software”), to deal in the Software without 9 | restriction, including without limitation the rights to use, 10 | copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ForecastGA 2 | A Python tool to forecast GA data using several popular time series models. 3 | 4 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1nmcu37MY02dfMdUbinrwwg7gA9ya3eud?usp=sharing) 5 | 6 | ![Logo for ForecastGA](https://repository-images.githubusercontent.com/325152404/637c0780-5713-11eb-9fc0-196d71fe0f6f) 7 | 8 | ## About 9 | 10 | ### Welcome to ForecastGA 11 | 12 | ForecastGA is a tool that combines a couple of popular libraries, [Atspy](https://github.com/firmai/atspy) and [googleanalytics](https://github.com/debrouwere/google-analytics), with a few enhancements. 13 | 14 | * The models are made more intuitive to upgrade and add by having the tool logic separate from the model training and prediction. 15 | * When calling `am.forecast_insample()`, any kwargs included (e.g. `learning_rate`) are passed to the train method of the model. 16 | * Google Analytics profiles are specified by simply passing the URL (e.g. https://analytics.google.com/analytics/web/?authuser=2#/report-home/aXXXXXwXXXXXpXXXXXX). 17 | * You can provide a `data` dict with GA config options or a Pandas Series as the input data. 18 | * Multiple log levels. 19 | * Auto GPU detection (via Torch). 20 | * List all available models, with descriptions, by calling `forecastga.print_model_info()`. 21 | * Google API info can be passed in the `data` dict or uploaded as a JSON file named `identity.json`. 22 | * Created a companion Google Colab notebook to easily run on GPU. 23 | * A handy plot function for Colab, `forecastga.plot_colab(forecast_in, title="Insample Forecast", dark_mode=True)` that formats nicely and also handles Dark Mode! 24 | 25 | ### Models Available 26 | * `ARIMA` : Automated ARIMA Modelling 27 | * `Prophet` : Modeling Multiple Seasonality With Linear or Non-linear Growth 28 | * `ProphetBC` : Prophet Model with Box-Cox transform of the data 29 | * `HWAAS` : Exponential Smoothing With Additive Trend and Additive Seasonality 30 | * `HWAMS` : Exponential Smoothing with Additive Trend and Multiplicative Seasonality 31 | * `NBEATS` : Neural basis expansion analysis (now fixed at 20 Epochs) 32 | * `Gluonts` : RNN-based Model (now fixed at 20 Epochs) 33 | * `TATS` : Seasonal and Trend no Box Cox 34 | * `TBAT` : Trend and Box Cox 35 | * `TBATS1` : Trend, Seasonal (one), and Box Cox 36 | * `TBATP1` : TBATS1 but Seasonal Inference is Hardcoded by Periodicity 37 | * `TBATS2` : TBATS1 With Two Seasonal Periods 38 | 39 | ### Models to be added 40 | * [Greykite](https://engineering.linkedin.com/blog/2021/greykite--a-flexible--intuitive--and-fast-forecasting-library) 41 | * [Kats](https://facebookresearch.github.io/Kats/) 42 | 43 | 44 | 45 | 46 | ### How To Use 47 | 48 | #### Find Model Info: 49 | `forecastga.print_model_info()` 50 | 51 | #### Initialize Model: 52 | 53 | ##### Google Analytics: 54 | 55 | ``` 56 | data = { 'client_id': '', 57 | 'client_secret': '', 58 | 'identity': '', 59 | 'ga_start_date': '2018-01-01', 60 | 'ga_end_date': '2019-12-31', 61 | 'ga_metric': 'sessions', 62 | 'ga_segment': 'organic traffic', 63 | 'ga_url': 'https://analytics.google.com/analytics/web/?authuser=2#/report-home/aXXXXXwXXXXXpXXXXXX', 64 | 'omit_values_over': 2000000 65 | } 66 | 67 | model_list = ["TATS", "TBATS1", "TBATP1", "TBATS2", "ARIMA"] 68 | am = forecastga.AutomatedModel(data , model_list=model_list, forecast_len=30 ) 69 | ``` 70 | 71 | ##### Pandas DataFrame: 72 | 73 | ``` 74 | # CSV with columns: Date and Sessions 75 | df = pd.read_csv('ga_sessions.csv') 76 | df.Date = pd.to_datetime(df.Date) 77 | df = df.set_index("Date") 78 | data = df.Sessions 79 | 80 | model_list = ["TATS", "TBATS1", "TBATP1", "TBATS2", "ARIMA"] 81 | am = forecastga.AutomatedModel(data , model_list=model_list, forecast_len=30 ) 82 | ``` 83 | 84 | #### Forecast Insample: 85 | `forecast_in, performance = am.forecast_insample()` 86 | 87 | #### Forecast Outsample: 88 | `forecast_out = am.forecast_outsample()` 89 | 90 | #### Ensemble Performance: 91 | `all_ensemble_in, all_ensemble_out, all_performance = am.ensemble(forecast_in, forecast_out)` 92 | 93 | #### Pretty Plot in Google Colab 94 | `forecastga.plot_colab(forecast_in, title="Insample Forecast", dark_mode=True)` 95 | 96 | 97 | # Installation 98 | Windows users may need to manually install the two items below via conda : 99 | 1. `conda install pystan` 100 | 1. `conda install pytorch -c pytorch` 101 | 1. `!pip install --upgrade git+https://github.com/jroakes/ForecastGA.git` 102 | 103 | otherwise, 104 | `pip install --upgrade forecastga` 105 | 106 | This repo support GPU training. Below are a few libraries that may have to be manually installed to support. 107 | ``` 108 | pip install --upgrade mxnet-cu101 109 | pip install --upgrade torch 1.7.0+cu101 110 | ``` 111 | 112 | 113 | ## Acknowledgements 114 | 115 | 1. Majority of forecasting code taken from https://github.com/firmai/atspy and refactored heavily. 116 | 1. Google Analytics based off of: https://github.com/debrouwere/google-analytics 117 | 1. Thanks to [richardfergie](https://github.com/richardfergie) for the addition of the Prophet Box-Cox model to control negative predictions. 118 | 119 | ## Contribute 120 | The goal of this repo is to grow the list of available models to test. If you would like to contribute one please read on. Feel free to have fun naming your models. 121 | 122 | 1. Fork the repo. 123 | 2. In the `/src/forecastga/models` folder there is a model called `template.py`. You can use this as a template for creating your new model. All available variables are there. Forecastga ensures each model has the right data and calls only the `train` and `forecast` methods for each model. Feel free to add additional methods that your model requires. 124 | 3. Edit the `/src/forecastga/models/__init__.py` file to add your model's information. Follow the format of the other entries. Forecastga relies on `loc` to find the model and `class` to find the class to use. 125 | 4. Edit `requirments.txt` with any additional libraries needed to run your model. Keep in mind that this repo should support GPU training if available and some libraries have separate GPU-enabled versions. 126 | 5. Issue a pull request. 127 | 128 | If you enjoyed this tool consider buying me some beer at: [Paypalme](https://www.paypal.com/paypalme/codeseo) 129 | -------------------------------------------------------------------------------- /data/ga_sessions.csv: -------------------------------------------------------------------------------- 1 | Date,Target 2 | 2016-01-01,2584.0 3 | 2016-01-02,3352.0 4 | 2016-01-03,2863.0 5 | 2016-01-04,6076.0 6 | 2016-01-05,4539.0 7 | 2016-01-06,4539.0 8 | 2016-01-07,5377.0 9 | 2016-01-08,5028.0 10 | 2016-01-09,3492.0 11 | 2016-01-10,2724.0 12 | 2016-01-11,5796.0 13 | 2016-01-12,5657.0 14 | 2016-01-13,5517.0 15 | 2016-01-14,6704.0 16 | 2016-01-15,6355.0 17 | 2016-01-16,3911.0 18 | 2016-01-17,2863.0 19 | 2016-01-18,5098.0 20 | 2016-01-19,5517.0 21 | 2016-01-20,4819.0 22 | 2016-01-21,6634.0 23 | 2016-01-22,5657.0 24 | 2016-01-23,3492.0 25 | 2016-01-24,2724.0 26 | 2016-01-25,6285.0 27 | 2016-01-26,6355.0 28 | 2016-01-27,5796.0 29 | 2016-01-28,6006.0 30 | 2016-01-29,6146.0 31 | 2016-01-30,4051.0 32 | 2016-01-31,3771.0 33 | 2016-02-01,6704.0 34 | 2016-02-02,5377.0 35 | 2016-02-03,5028.0 36 | 2016-02-04,6495.0 37 | 2016-02-05,5447.0 38 | 2016-02-06,3212.0 39 | 2016-02-07,3282.0 40 | 2016-02-08,5098.0 41 | 2016-02-09,4749.0 42 | 2016-02-10,5447.0 43 | 2016-02-11,5028.0 44 | 2016-02-12,5796.0 45 | 2016-02-13,4120.0 46 | 2016-02-14,3143.0 47 | 2016-02-15,6076.0 48 | 2016-02-16,6914.0 49 | 2016-02-17,6495.0 50 | 2016-02-18,6634.0 51 | 2016-02-19,5308.0 52 | 2016-02-20,3562.0 53 | 2016-02-21,2724.0 54 | 2016-02-22,5866.0 55 | 2016-02-23,5587.0 56 | 2016-02-24,5238.0 57 | 2016-02-25,6076.0 58 | 2016-02-26,5936.0 59 | 2016-02-27,3352.0 60 | 2016-02-28,2584.0 61 | 2016-02-29,6076.0 62 | 2016-03-01,6774.0 63 | 2016-03-02,5377.0 64 | 2016-03-03,6634.0 65 | 2016-03-04,5168.0 66 | 2016-03-05,4470.0 67 | 2016-03-06,3073.0 68 | 2016-03-07,5657.0 69 | 2016-03-08,5308.0 70 | 2016-03-09,4958.0 71 | 2016-03-10,6425.0 72 | 2016-03-11,5308.0 73 | 2016-03-12,3911.0 74 | 2016-03-13,3003.0 75 | 2016-03-14,5657.0 76 | 2016-03-15,5657.0 77 | 2016-03-16,6425.0 78 | 2016-03-17,6565.0 79 | 2016-03-18,5238.0 80 | 2016-03-19,3562.0 81 | 2016-03-20,3073.0 82 | 2016-03-21,6076.0 83 | 2016-03-22,6634.0 84 | 2016-03-23,5308.0 85 | 2016-03-24,6844.0 86 | 2016-03-25,4051.0 87 | 2016-03-26,3771.0 88 | 2016-03-27,2235.0 89 | 2016-03-28,6634.0 90 | 2016-03-29,5657.0 91 | 2016-03-30,4958.0 92 | 2016-03-31,6634.0 93 | 2016-04-01,5028.0 94 | 2016-04-02,3701.0 95 | 2016-04-03,2933.0 96 | 2016-04-04,5866.0 97 | 2016-04-05,5308.0 98 | 2016-04-06,4539.0 99 | 2016-04-07,5796.0 100 | 2016-04-08,5377.0 101 | 2016-04-09,3701.0 102 | 2016-04-10,3212.0 103 | 2016-04-11,5098.0 104 | 2016-04-12,5028.0 105 | 2016-04-13,4958.0 106 | 2016-04-14,5866.0 107 | 2016-04-15,5377.0 108 | 2016-04-16,4609.0 109 | 2016-04-17,8101.0 110 | 2016-04-18,5796.0 111 | 2016-04-19,5587.0 112 | 2016-04-20,5098.0 113 | 2016-04-21,5168.0 114 | 2016-04-22,5028.0 115 | 2016-04-23,2724.0 116 | 2016-04-24,2095.0 117 | 2016-04-25,5238.0 118 | 2016-04-26,4819.0 119 | 2016-04-27,5098.0 120 | 2016-04-28,5517.0 121 | 2016-04-29,4470.0 122 | 2016-04-30,3143.0 123 | 2016-05-01,2444.0 124 | 2016-05-02,5517.0 125 | 2016-05-03,4539.0 126 | 2016-05-04,4958.0 127 | 2016-05-05,5447.0 128 | 2016-05-06,4889.0 129 | 2016-05-07,2863.0 130 | 2016-05-08,2095.0 131 | 2016-05-09,4889.0 132 | 2016-05-10,4819.0 133 | 2016-05-11,4958.0 134 | 2016-05-12,5377.0 135 | 2016-05-13,4819.0 136 | 2016-05-14,2374.0 137 | 2016-05-15,2444.0 138 | 2016-05-16,4190.0 139 | 2016-05-17,3562.0 140 | 2016-05-18,3701.0 141 | 2016-05-19,3701.0 142 | 2016-05-20,3562.0 143 | 2016-05-21,3212.0 144 | 2016-05-22,2793.0 145 | 2016-05-23,5587.0 146 | 2016-05-24,4889.0 147 | 2016-05-25,3771.0 148 | 2016-05-26,4679.0 149 | 2016-05-27,4539.0 150 | 2016-05-28,3422.0 151 | 2016-05-29,1606.0 152 | 2016-05-30,2933.0 153 | 2016-05-31,4889.0 154 | 2016-06-01,5657.0 155 | 2016-06-02,6565.0 156 | 2016-06-03,5098.0 157 | 2016-06-04,3771.0 158 | 2016-06-05,2654.0 159 | 2016-06-06,5308.0 160 | 2016-06-07,5168.0 161 | 2016-06-08,4051.0 162 | 2016-06-09,4889.0 163 | 2016-06-10,5098.0 164 | 2016-06-11,2724.0 165 | 2016-06-12,2444.0 166 | 2016-06-13,5377.0 167 | 2016-06-14,5168.0 168 | 2016-06-15,5657.0 169 | 2016-06-16,5447.0 170 | 2016-06-17,4958.0 171 | 2016-06-18,2933.0 172 | 2016-06-19,2025.0 173 | 2016-06-20,6076.0 174 | 2016-06-21,5098.0 175 | 2016-06-22,6146.0 176 | 2016-06-23,5377.0 177 | 2016-06-24,3771.0 178 | 2016-06-25,3073.0 179 | 2016-06-26,1886.0 180 | 2016-06-27,5587.0 181 | 2016-06-28,5517.0 182 | 2016-06-29,6146.0 183 | 2016-06-30,6704.0 184 | 2016-07-01,6215.0 185 | 2016-07-02,3003.0 186 | 2016-07-03,2165.0 187 | 2016-07-04,2374.0 188 | 2016-07-05,5727.0 189 | 2016-07-06,4958.0 190 | 2016-07-07,6355.0 191 | 2016-07-08,4330.0 192 | 2016-07-09,3003.0 193 | 2016-07-10,2095.0 194 | 2016-07-11,4958.0 195 | 2016-07-12,4330.0 196 | 2016-07-13,4260.0 197 | 2016-07-14,5727.0 198 | 2016-07-15,5308.0 199 | 2016-07-16,3073.0 200 | 2016-07-17,3352.0 201 | 2016-07-18,6006.0 202 | 2016-07-19,5657.0 203 | 2016-07-20,5028.0 204 | 2016-07-21,5727.0 205 | 2016-07-22,4330.0 206 | 2016-07-23,2793.0 207 | 2016-07-24,2584.0 208 | 2016-07-25,6146.0 209 | 2016-07-26,5866.0 210 | 2016-07-27,5028.0 211 | 2016-07-28,5866.0 212 | 2016-07-29,5587.0 213 | 2016-07-30,3073.0 214 | 2016-07-31,2514.0 215 | 2016-08-01,6774.0 216 | 2016-08-02,5377.0 217 | 2016-08-03,5098.0 218 | 2016-08-04,5866.0 219 | 2016-08-05,5866.0 220 | 2016-08-06,2933.0 221 | 2016-08-07,3073.0 222 | 2016-08-08,6076.0 223 | 2016-08-09,5098.0 224 | 2016-08-10,5238.0 225 | 2016-08-11,5936.0 226 | 2016-08-12,5866.0 227 | 2016-08-13,3352.0 228 | 2016-08-14,4120.0 229 | 2016-08-15,6215.0 230 | 2016-08-16,5028.0 231 | 2016-08-17,4470.0 232 | 2016-08-18,5936.0 233 | 2016-08-19,5727.0 234 | 2016-08-20,3422.0 235 | 2016-08-21,2724.0 236 | 2016-08-22,5028.0 237 | 2016-08-23,4889.0 238 | 2016-08-24,4819.0 239 | 2016-08-25,5727.0 240 | 2016-08-26,5727.0 241 | 2016-08-27,3143.0 242 | 2016-08-28,1886.0 243 | 2016-08-29,4958.0 244 | 2016-08-30,5796.0 245 | 2016-08-31,6006.0 246 | 2016-09-01,6285.0 247 | 2016-09-02,4609.0 248 | 2016-09-03,3492.0 249 | 2016-09-04,2235.0 250 | 2016-09-05,3212.0 251 | 2016-09-06,5936.0 252 | 2016-09-07,5377.0 253 | 2016-09-08,5098.0 254 | 2016-09-09,5936.0 255 | 2016-09-10,3771.0 256 | 2016-09-11,2374.0 257 | 2016-09-12,5308.0 258 | 2016-09-13,3911.0 259 | 2016-09-14,4400.0 260 | 2016-09-15,7472.0 261 | 2016-09-16,5587.0 262 | 2016-09-17,2724.0 263 | 2016-09-18,3422.0 264 | 2016-09-19,5517.0 265 | 2016-09-20,4470.0 266 | 2016-09-21,4679.0 267 | 2016-09-22,6285.0 268 | 2016-09-23,5936.0 269 | 2016-09-24,2933.0 270 | 2016-09-25,2305.0 271 | 2016-09-26,6006.0 272 | 2016-09-27,5098.0 273 | 2016-09-28,5238.0 274 | 2016-09-29,6076.0 275 | 2016-09-30,5168.0 276 | 2016-10-01,3073.0 277 | 2016-10-02,2514.0 278 | 2016-10-03,6006.0 279 | 2016-10-04,3981.0 280 | 2016-10-05,4749.0 281 | 2016-10-06,5238.0 282 | 2016-10-07,4889.0 283 | 2016-10-08,2584.0 284 | 2016-10-09,2514.0 285 | 2016-10-10,4819.0 286 | 2016-10-11,5098.0 287 | 2016-10-12,4889.0 288 | 2016-10-13,6146.0 289 | 2016-10-14,4400.0 290 | 2016-10-15,3212.0 291 | 2016-10-16,2095.0 292 | 2016-10-17,4819.0 293 | 2016-10-18,3981.0 294 | 2016-10-19,5308.0 295 | 2016-10-20,5168.0 296 | 2016-10-21,4051.0 297 | 2016-10-22,3003.0 298 | 2016-10-23,2514.0 299 | 2016-10-24,5028.0 300 | 2016-10-25,4260.0 301 | 2016-10-26,4539.0 302 | 2016-10-27,5098.0 303 | 2016-10-28,4749.0 304 | 2016-10-29,3282.0 305 | 2016-10-30,3352.0 306 | 2016-10-31,5238.0 307 | 2016-11-01,4749.0 308 | 2016-11-02,4889.0 309 | 2016-11-03,5168.0 310 | 2016-11-04,5238.0 311 | 2016-11-05,3073.0 312 | 2016-11-06,2793.0 313 | 2016-11-07,4889.0 314 | 2016-11-08,4400.0 315 | 2016-11-09,4470.0 316 | 2016-11-10,5657.0 317 | 2016-11-11,4609.0 318 | 2016-11-12,3771.0 319 | 2016-11-13,2724.0 320 | 2016-11-14,5936.0 321 | 2016-11-15,5447.0 322 | 2016-11-16,5098.0 323 | 2016-11-17,5727.0 324 | 2016-11-18,5098.0 325 | 2016-11-19,2654.0 326 | 2016-11-20,2654.0 327 | 2016-11-21,5517.0 328 | 2016-11-22,5866.0 329 | 2016-11-23,4679.0 330 | 2016-11-24,2165.0 331 | 2016-11-25,4539.0 332 | 2016-11-26,3073.0 333 | 2016-11-27,2863.0 334 | 2016-11-28,5098.0 335 | 2016-11-29,5657.0 336 | 2016-11-30,7333.0 337 | 2016-12-01,6495.0 338 | 2016-12-02,4819.0 339 | 2016-12-03,3771.0 340 | 2016-12-04,2305.0 341 | 2016-12-05,5168.0 342 | 2016-12-06,4819.0 343 | 2016-12-07,5727.0 344 | 2016-12-08,5796.0 345 | 2016-12-09,5168.0 346 | 2016-12-10,3422.0 347 | 2016-12-11,3003.0 348 | 2016-12-12,6285.0 349 | 2016-12-13,6774.0 350 | 2016-12-14,6215.0 351 | 2016-12-15,6006.0 352 | 2016-12-16,5308.0 353 | 2016-12-17,4749.0 354 | 2016-12-18,3143.0 355 | 2016-12-19,5168.0 356 | 2016-12-20,5028.0 357 | 2016-12-21,4539.0 358 | 2016-12-22,6355.0 359 | 2016-12-23,4470.0 360 | 2016-12-24,2095.0 361 | 2016-12-25,2374.0 362 | 2016-12-26,4749.0 363 | 2016-12-27,5866.0 364 | 2016-12-28,7333.0 365 | 2016-12-29,6914.0 366 | 2016-12-30,5936.0 367 | 2016-12-31,3981.0 368 | 2017-01-01,3771.0 369 | 2017-01-02,5727.0 370 | 2017-01-03,7123.0 371 | 2017-01-04,5936.0 372 | 2017-01-05,7263.0 373 | 2017-01-06,5866.0 374 | 2017-01-07,3771.0 375 | 2017-01-08,3282.0 376 | 2017-01-09,5936.0 377 | 2017-01-10,6425.0 378 | 2017-01-11,7333.0 379 | 2017-01-12,6355.0 380 | 2017-01-13,6006.0 381 | 2017-01-14,4120.0 382 | 2017-01-15,4051.0 383 | 2017-01-16,6704.0 384 | 2017-01-17,6914.0 385 | 2017-01-18,5447.0 386 | 2017-01-19,8101.0 387 | 2017-01-20,5796.0 388 | 2017-01-21,4470.0 389 | 2017-01-22,2793.0 390 | 2017-01-23,5796.0 391 | 2017-01-24,5727.0 392 | 2017-01-25,5308.0 393 | 2017-01-26,6914.0 394 | 2017-01-27,6146.0 395 | 2017-01-28,3701.0 396 | 2017-01-29,3422.0 397 | 2017-01-30,6355.0 398 | 2017-01-31,6634.0 399 | 2017-02-01,7682.0 400 | 2017-02-02,7403.0 401 | 2017-02-03,6285.0 402 | 2017-02-04,4120.0 403 | 2017-02-05,3911.0 404 | 2017-02-06,5238.0 405 | 2017-02-07,6076.0 406 | 2017-02-08,6355.0 407 | 2017-02-09,6914.0 408 | 2017-02-10,5866.0 409 | 2017-02-11,3492.0 410 | 2017-02-12,3422.0 411 | 2017-02-13,6006.0 412 | 2017-02-14,5866.0 413 | 2017-02-15,6146.0 414 | 2017-02-16,6634.0 415 | 2017-02-17,5377.0 416 | 2017-02-18,4400.0 417 | 2017-02-19,3212.0 418 | 2017-02-20,5028.0 419 | 2017-02-21,5866.0 420 | 2017-02-22,8590.0 421 | 2017-02-23,7193.0 422 | 2017-02-24,5587.0 423 | 2017-02-25,3422.0 424 | 2017-02-26,3701.0 425 | 2017-02-27,6146.0 426 | 2017-02-28,7403.0 427 | 2017-03-01,7053.0 428 | 2017-03-02,6076.0 429 | 2017-03-03,6495.0 430 | 2017-03-04,3701.0 431 | 2017-03-05,2654.0 432 | 2017-03-06,5657.0 433 | 2017-03-07,4679.0 434 | 2017-03-08,6076.0 435 | 2017-03-09,5447.0 436 | 2017-03-10,5517.0 437 | 2017-03-11,4470.0 438 | 2017-03-12,2793.0 439 | 2017-03-13,4889.0 440 | 2017-03-14,5238.0 441 | 2017-03-15,5936.0 442 | 2017-03-16,4400.0 443 | 2017-03-17,4539.0 444 | 2017-03-18,3143.0 445 | 2017-03-19,2654.0 446 | 2017-03-20,5168.0 447 | 2017-03-21,5447.0 448 | 2017-03-22,5727.0 449 | 2017-03-23,6355.0 450 | 2017-03-24,5866.0 451 | 2017-03-25,3492.0 452 | 2017-03-26,2863.0 453 | 2017-03-27,6844.0 454 | 2017-03-28,8031.0 455 | 2017-03-29,7891.0 456 | 2017-03-30,6146.0 457 | 2017-03-31,5796.0 458 | 2017-04-01,3492.0 459 | 2017-04-02,3562.0 460 | 2017-04-03,6774.0 461 | 2017-04-04,4958.0 462 | 2017-04-05,4958.0 463 | 2017-04-06,5657.0 464 | 2017-04-07,4819.0 465 | 2017-04-08,2793.0 466 | 2017-04-09,1886.0 467 | 2017-04-10,4190.0 468 | 2017-04-11,4749.0 469 | 2017-04-12,5028.0 470 | 2017-04-13,5727.0 471 | 2017-04-14,4889.0 472 | 2017-04-15,3003.0 473 | 2017-04-16,1955.0 474 | 2017-04-17,5866.0 475 | 2017-04-18,5517.0 476 | 2017-04-19,5168.0 477 | 2017-04-20,5028.0 478 | 2017-04-21,4749.0 479 | 2017-04-22,2444.0 480 | 2017-04-23,2514.0 481 | 2017-04-24,4889.0 482 | 2017-04-25,3352.0 483 | 2017-04-26,4051.0 484 | 2017-04-27,5168.0 485 | 2017-04-28,5238.0 486 | 2017-04-29,2514.0 487 | 2017-04-30,1746.0 488 | 2017-05-01,5587.0 489 | 2017-05-02,6285.0 490 | 2017-05-03,5936.0 491 | 2017-05-04,5168.0 492 | 2017-05-05,3701.0 493 | 2017-05-06,2793.0 494 | 2017-05-07,2025.0 495 | 2017-05-08,4819.0 496 | 2017-05-09,5028.0 497 | 2017-05-10,4819.0 498 | 2017-05-11,4958.0 499 | 2017-05-12,4539.0 500 | 2017-05-13,2374.0 501 | 2017-05-14,2863.0 502 | 2017-05-15,5238.0 503 | 2017-05-16,5587.0 504 | 2017-05-17,4470.0 505 | 2017-05-18,4889.0 506 | 2017-05-19,4260.0 507 | 2017-05-20,2025.0 508 | 2017-05-21,2444.0 509 | 2017-05-22,4889.0 510 | 2017-05-23,5727.0 511 | 2017-05-24,3631.0 512 | 2017-05-25,5447.0 513 | 2017-05-26,3911.0 514 | 2017-05-27,2514.0 515 | 2017-05-28,1467.0 516 | 2017-05-29,2724.0 517 | 2017-05-30,4819.0 518 | 2017-05-31,4120.0 519 | 2017-06-01,6215.0 520 | 2017-06-02,4609.0 521 | 2017-06-03,2584.0 522 | 2017-06-04,2584.0 523 | 2017-06-05,4819.0 524 | 2017-06-06,4330.0 525 | 2017-06-07,4400.0 526 | 2017-06-08,4470.0 527 | 2017-06-09,4609.0 528 | 2017-06-10,2025.0 529 | 2017-06-11,1257.0 530 | 2017-06-12,4609.0 531 | 2017-06-13,3073.0 532 | 2017-06-14,3981.0 533 | 2017-06-15,4120.0 534 | 2017-06-16,4400.0 535 | 2017-06-17,2514.0 536 | 2017-06-18,2025.0 537 | 2017-06-19,4609.0 538 | 2017-06-20,3771.0 539 | 2017-06-21,4120.0 540 | 2017-06-22,4749.0 541 | 2017-06-23,3911.0 542 | 2017-06-24,2165.0 543 | 2017-06-25,2235.0 544 | 2017-06-26,5238.0 545 | 2017-06-27,4190.0 546 | 2017-06-28,4400.0 547 | 2017-06-29,5028.0 548 | 2017-06-30,5308.0 549 | 2017-07-01,3282.0 550 | 2017-07-02,2235.0 551 | 2017-07-03,3911.0 552 | 2017-07-04,2793.0 553 | 2017-07-05,5727.0 554 | 2017-07-06,5028.0 555 | 2017-07-07,4470.0 556 | 2017-07-08,3143.0 557 | 2017-07-09,1886.0 558 | 2017-07-10,4470.0 559 | 2017-07-11,5028.0 560 | 2017-07-12,4400.0 561 | 2017-07-13,4470.0 562 | 2017-07-14,6146.0 563 | 2017-07-15,2584.0 564 | 2017-07-16,2165.0 565 | 2017-07-17,5377.0 566 | 2017-07-18,5657.0 567 | 2017-07-19,5727.0 568 | 2017-07-20,5517.0 569 | 2017-07-21,5168.0 570 | 2017-07-22,2933.0 571 | 2017-07-23,2793.0 572 | 2017-07-24,7193.0 573 | 2017-07-25,6146.0 574 | 2017-07-26,5587.0 575 | 2017-07-27,5447.0 576 | 2017-07-28,4539.0 577 | 2017-07-29,3631.0 578 | 2017-07-30,2165.0 579 | 2017-07-31,6215.0 580 | 2017-08-01,5936.0 581 | 2017-08-02,6006.0 582 | 2017-08-03,6285.0 583 | 2017-08-04,4958.0 584 | 2017-08-05,3073.0 585 | 2017-08-06,2305.0 586 | 2017-08-07,7961.0 587 | 2017-08-08,4958.0 588 | 2017-08-09,5727.0 589 | 2017-08-10,6146.0 590 | 2017-08-11,5238.0 591 | 2017-08-12,3492.0 592 | 2017-08-13,2165.0 593 | 2017-08-14,5796.0 594 | 2017-08-15,6076.0 595 | 2017-08-16,6425.0 596 | 2017-08-17,4889.0 597 | 2017-08-18,4819.0 598 | 2017-08-19,3562.0 599 | 2017-08-20,2793.0 600 | 2017-08-21,6285.0 601 | 2017-08-22,6285.0 602 | 2017-08-23,5098.0 603 | 2017-08-24,4819.0 604 | 2017-08-25,3631.0 605 | 2017-08-26,1536.0 606 | 2017-08-27,1536.0 607 | 2017-08-28,3282.0 608 | 2017-08-29,4609.0 609 | 2017-08-30,6704.0 610 | 2017-08-31,6146.0 611 | 2017-09-01,6565.0 612 | 2017-09-02,5796.0 613 | 2017-09-03,3911.0 614 | 2017-09-04,4330.0 615 | 2017-09-05,7472.0 616 | 2017-09-06,6565.0 617 | 2017-09-07,6425.0 618 | 2017-09-08,4609.0 619 | 2017-09-09,3631.0 620 | 2017-09-10,3352.0 621 | 2017-09-11,6425.0 622 | 2017-09-12,6704.0 623 | 2017-09-13,6844.0 624 | 2017-09-14,6704.0 625 | 2017-09-15,5308.0 626 | 2017-09-16,3701.0 627 | 2017-09-17,2863.0 628 | 2017-09-18,5657.0 629 | 2017-09-19,5796.0 630 | 2017-09-20,7333.0 631 | 2017-09-21,8031.0 632 | 2017-09-22,6285.0 633 | 2017-09-23,3212.0 634 | 2017-09-24,3073.0 635 | 2017-09-25,5657.0 636 | 2017-09-26,5796.0 637 | 2017-09-27,7682.0 638 | 2017-09-28,7193.0 639 | 2017-09-29,6146.0 640 | 2017-09-30,3492.0 641 | 2017-10-01,4400.0 642 | 2017-10-02,6215.0 643 | 2017-10-03,6844.0 644 | 2017-10-04,6076.0 645 | 2017-10-05,6215.0 646 | 2017-10-06,4190.0 647 | 2017-10-07,2793.0 648 | 2017-10-08,2514.0 649 | 2017-10-09,4749.0 650 | 2017-10-10,5377.0 651 | 2017-10-11,6215.0 652 | 2017-10-12,5936.0 653 | 2017-10-13,5936.0 654 | 2017-10-14,3771.0 655 | 2017-10-15,2444.0 656 | 2017-10-16,5936.0 657 | 2017-10-17,5866.0 658 | 2017-10-18,6076.0 659 | 2017-10-19,6006.0 660 | 2017-10-20,6076.0 661 | 2017-10-21,2933.0 662 | 2017-10-22,3003.0 663 | 2017-10-23,5238.0 664 | 2017-10-24,5098.0 665 | 2017-10-25,5377.0 666 | 2017-10-26,6565.0 667 | 2017-10-27,6914.0 668 | 2017-10-28,3492.0 669 | 2017-10-29,2863.0 670 | 2017-10-30,6355.0 671 | 2017-10-31,4819.0 672 | 2017-11-01,7542.0 673 | 2017-11-02,6425.0 674 | 2017-11-03,5517.0 675 | 2017-11-04,3841.0 676 | 2017-11-05,3212.0 677 | 2017-11-06,6215.0 678 | 2017-11-07,5866.0 679 | 2017-11-08,6006.0 680 | 2017-11-09,6495.0 681 | 2017-11-10,6704.0 682 | 2017-11-11,3562.0 683 | 2017-11-12,3422.0 684 | 2017-11-13,6704.0 685 | 2017-11-14,6076.0 686 | 2017-11-15,5936.0 687 | 2017-11-16,6076.0 688 | 2017-11-17,6565.0 689 | 2017-11-18,2863.0 690 | 2017-11-19,3212.0 691 | 2017-11-20,6565.0 692 | 2017-11-21,6215.0 693 | 2017-11-22,5796.0 694 | 2017-11-23,2025.0 695 | 2017-11-24,4889.0 696 | 2017-11-25,4190.0 697 | 2017-11-26,3841.0 698 | 2017-11-27,7403.0 699 | 2017-11-28,6565.0 700 | 2017-11-29,7752.0 701 | 2017-11-30,8450.0 702 | 2017-12-01,6774.0 703 | 2017-12-02,4470.0 704 | 2017-12-03,3003.0 705 | 2017-12-04,6984.0 706 | 2017-12-05,6355.0 707 | 2017-12-06,6774.0 708 | 2017-12-07,6215.0 709 | 2017-12-08,5796.0 710 | 2017-12-09,3492.0 711 | 2017-12-10,3212.0 712 | 2017-12-11,6914.0 713 | 2017-12-12,6704.0 714 | 2017-12-13,6704.0 715 | 2017-12-14,6634.0 716 | 2017-12-15,5727.0 717 | 2017-12-16,3981.0 718 | 2017-12-17,2584.0 719 | 2017-12-18,5936.0 720 | 2017-12-19,4679.0 721 | 2017-12-20,6355.0 722 | 2017-12-21,7403.0 723 | 2017-12-22,4958.0 724 | 2017-12-23,3352.0 725 | 2017-12-24,2165.0 726 | 2017-12-25,2025.0 727 | 2017-12-26,5098.0 728 | 2017-12-27,6355.0 729 | 2017-12-28,7193.0 730 | 2017-12-29,6565.0 731 | 2017-12-30,2863.0 732 | 2017-12-31,2584.0 733 | 2018-01-01,3143.0 734 | 2018-01-02,7193.0 735 | 2018-01-03,7822.0 736 | 2018-01-04,8939.0 737 | 2018-01-05,7123.0 738 | 2018-01-06,3771.0 739 | 2018-01-07,2933.0 740 | 2018-01-08,6844.0 741 | 2018-01-09,5587.0 742 | 2018-01-10,6984.0 743 | 2018-01-11,6146.0 744 | 2018-01-12,5796.0 745 | 2018-01-13,3841.0 746 | 2018-01-14,3841.0 747 | 2018-01-15,5936.0 748 | 2018-01-16,6285.0 749 | 2018-01-17,6774.0 750 | 2018-01-18,7891.0 751 | 2018-01-19,6006.0 752 | 2018-01-20,3492.0 753 | 2018-01-21,3841.0 754 | 2018-01-22,6774.0 755 | 2018-01-23,6425.0 756 | 2018-01-24,5796.0 757 | 2018-01-25,7682.0 758 | 2018-01-26,6006.0 759 | 2018-01-27,3562.0 760 | 2018-01-28,3073.0 761 | 2018-01-29,5866.0 762 | 2018-01-30,6495.0 763 | 2018-01-31,7612.0 764 | 2018-02-01,7961.0 765 | 2018-02-02,6355.0 766 | 2018-02-03,4051.0 767 | 2018-02-04,3003.0 768 | 2018-02-05,7053.0 769 | 2018-02-06,7891.0 770 | 2018-02-07,6914.0 771 | 2018-02-08,9218.0 772 | 2018-02-09,6984.0 773 | 2018-02-10,3562.0 774 | 2018-02-11,3911.0 775 | 2018-02-12,7612.0 776 | 2018-02-13,6495.0 777 | 2018-02-14,8031.0 778 | 2018-02-15,8590.0 779 | 2018-02-16,7542.0 780 | 2018-02-17,4819.0 781 | 2018-02-18,3422.0 782 | 2018-02-19,7542.0 783 | 2018-02-20,7891.0 784 | 2018-02-21,8101.0 785 | 2018-02-22,9498.0 786 | 2018-02-23,6774.0 787 | 2018-02-24,3562.0 788 | 2018-02-25,3771.0 789 | 2018-02-26,5936.0 790 | 2018-02-27,6285.0 791 | 2018-02-28,7053.0 792 | 2018-03-01,7542.0 793 | 2018-03-02,6215.0 794 | 2018-03-03,3701.0 795 | 2018-03-04,3771.0 796 | 2018-03-05,6215.0 797 | 2018-03-06,6425.0 798 | 2018-03-07,6355.0 799 | 2018-03-08,7682.0 800 | 2018-03-09,6634.0 801 | 2018-03-10,3981.0 802 | 2018-03-11,3562.0 803 | 2018-03-12,5587.0 804 | 2018-03-13,5517.0 805 | 2018-03-14,5377.0 806 | 2018-03-15,6146.0 807 | 2018-03-16,5936.0 808 | 2018-03-17,3911.0 809 | 2018-03-18,3701.0 810 | 2018-03-19,6774.0 811 | 2018-03-20,6774.0 812 | 2018-03-21,6285.0 813 | 2018-03-22,6285.0 814 | 2018-03-23,7123.0 815 | 2018-03-24,3073.0 816 | 2018-03-25,3003.0 817 | 2018-03-26,6006.0 818 | 2018-03-27,6076.0 819 | 2018-03-28,6634.0 820 | 2018-03-29,6425.0 821 | 2018-03-30,5377.0 822 | 2018-03-31,3701.0 823 | 2018-04-01,2654.0 824 | 2018-04-02,7472.0 825 | 2018-04-03,6774.0 826 | 2018-04-04,5796.0 827 | 2018-04-05,6565.0 828 | 2018-04-06,6355.0 829 | 2018-04-07,3492.0 830 | 2018-04-08,2444.0 831 | 2018-04-09,6914.0 832 | 2018-04-10,5657.0 833 | 2018-04-11,6495.0 834 | 2018-04-12,6146.0 835 | 2018-04-13,6914.0 836 | 2018-04-14,3841.0 837 | 2018-04-15,3282.0 838 | 2018-04-16,6495.0 839 | 2018-04-17,5796.0 840 | 2018-04-18,6495.0 841 | 2018-04-19,7053.0 842 | 2018-04-20,7123.0 843 | 2018-04-21,3492.0 844 | 2018-04-22,2863.0 845 | 2018-04-23,6844.0 846 | 2018-04-24,5866.0 847 | 2018-04-25,5168.0 848 | 2018-04-26,6215.0 849 | 2018-04-27,7263.0 850 | 2018-04-28,4051.0 851 | 2018-04-29,3282.0 852 | 2018-04-30,7822.0 853 | 2018-05-01,7333.0 854 | 2018-05-02,6285.0 855 | 2018-05-03,7263.0 856 | 2018-05-04,6565.0 857 | 2018-05-05,4190.0 858 | 2018-05-06,3352.0 859 | 2018-05-07,7053.0 860 | 2018-05-08,5727.0 861 | 2018-05-09,6565.0 862 | 2018-05-10,5866.0 863 | 2018-05-11,6425.0 864 | 2018-05-12,3422.0 865 | 2018-05-13,2514.0 866 | 2018-05-14,7053.0 867 | 2018-05-15,5377.0 868 | 2018-05-16,5098.0 869 | 2018-05-17,7123.0 870 | 2018-05-18,5098.0 871 | 2018-05-19,3143.0 872 | 2018-05-20,3282.0 873 | 2018-05-21,6355.0 874 | 2018-05-22,6285.0 875 | 2018-05-23,5866.0 876 | 2018-05-24,7542.0 877 | 2018-05-25,5517.0 878 | 2018-05-26,2933.0 879 | 2018-05-27,3143.0 880 | 2018-05-28,3492.0 881 | 2018-05-29,7822.0 882 | 2018-05-30,9777.0 883 | 2018-05-31,7891.0 884 | 2018-06-01,7263.0 885 | 2018-06-02,4679.0 886 | 2018-06-03,3143.0 887 | 2018-06-04,6984.0 888 | 2018-06-05,6634.0 889 | 2018-06-06,6984.0 890 | 2018-06-07,7333.0 891 | 2018-06-08,5727.0 892 | 2018-06-09,3212.0 893 | 2018-06-10,2933.0 894 | 2018-06-11,6565.0 895 | 2018-06-12,6006.0 896 | 2018-06-13,6565.0 897 | 2018-06-14,6355.0 898 | 2018-06-15,6634.0 899 | 2018-06-16,3841.0 900 | 2018-06-17,3003.0 901 | 2018-06-18,6425.0 902 | 2018-06-19,5796.0 903 | 2018-06-20,6774.0 904 | 2018-06-21,6355.0 905 | 2018-06-22,5587.0 906 | 2018-06-23,3073.0 907 | 2018-06-24,3143.0 908 | 2018-06-25,6844.0 909 | 2018-06-26,5308.0 910 | 2018-06-27,6704.0 911 | 2018-06-28,8311.0 912 | 2018-06-29,7263.0 913 | 2018-06-30,5168.0 914 | 2018-07-01,3911.0 915 | 2018-07-02,7822.0 916 | 2018-07-03,8450.0 917 | 2018-07-04,5587.0 918 | 2018-07-05,8101.0 919 | 2018-07-06,8450.0 920 | 2018-07-07,4679.0 921 | 2018-07-08,2863.0 922 | 2018-07-09,6844.0 923 | 2018-07-10,6285.0 924 | 2018-07-11,6355.0 925 | 2018-07-12,7123.0 926 | 2018-07-13,6704.0 927 | 2018-07-14,3981.0 928 | 2018-07-15,3352.0 929 | 2018-07-16,7263.0 930 | 2018-07-17,6006.0 931 | 2018-07-18,6914.0 932 | 2018-07-19,8101.0 933 | 2018-07-20,7193.0 934 | 2018-07-21,4120.0 935 | 2018-07-22,2724.0 936 | 2018-07-23,6006.0 937 | 2018-07-24,6355.0 938 | 2018-07-25,7263.0 939 | 2018-07-26,7612.0 940 | 2018-07-27,7822.0 941 | 2018-07-28,4749.0 942 | 2018-07-29,3143.0 943 | 2018-07-30,7263.0 944 | 2018-07-31,7123.0 945 | 2018-08-01,7053.0 946 | 2018-08-02,6355.0 947 | 2018-08-03,6076.0 948 | 2018-08-04,2863.0 949 | 2018-08-05,3143.0 950 | 2018-08-06,7263.0 951 | 2018-08-07,6495.0 952 | 2018-08-08,6355.0 953 | 2018-08-09,6844.0 954 | 2018-08-10,6146.0 955 | 2018-08-11,3771.0 956 | 2018-08-12,3073.0 957 | 2018-08-13,6495.0 958 | 2018-08-14,5587.0 959 | 2018-08-15,6565.0 960 | 2018-08-16,6565.0 961 | 2018-08-17,5447.0 962 | 2018-08-18,3771.0 963 | 2018-08-19,2863.0 964 | 2018-08-20,7123.0 965 | 2018-08-21,6774.0 966 | 2018-08-22,5727.0 967 | 2018-08-23,6774.0 968 | 2018-08-24,7123.0 969 | 2018-08-25,4819.0 970 | 2018-08-26,3143.0 971 | 2018-08-27,7542.0 972 | 2018-08-28,5517.0 973 | 2018-08-29,6495.0 974 | 2018-08-30,7682.0 975 | 2018-08-31,6215.0 976 | 2018-09-01,4330.0 977 | 2018-09-02,2724.0 978 | 2018-09-03,3701.0 979 | 2018-09-04,7123.0 980 | 2018-09-05,7612.0 981 | 2018-09-06,7123.0 982 | 2018-09-07,6425.0 983 | 2018-09-08,3562.0 984 | 2018-09-09,3492.0 985 | 2018-09-10,6704.0 986 | 2018-09-11,6076.0 987 | 2018-09-12,6146.0 988 | 2018-09-13,7193.0 989 | 2018-09-14,6285.0 990 | 2018-09-15,3771.0 991 | 2018-09-16,3981.0 992 | 2018-09-17,6355.0 993 | 2018-09-18,6076.0 994 | 2018-09-19,7123.0 995 | 2018-09-20,6425.0 996 | 2018-09-21,5727.0 997 | 2018-09-22,3492.0 998 | 2018-09-23,2933.0 999 | 2018-09-24,6565.0 1000 | 2018-09-25,6844.0 1001 | 2018-09-26,5377.0 1002 | 2018-09-27,5238.0 1003 | 2018-09-28,5028.0 1004 | 2018-09-29,4190.0 1005 | 2018-09-30,3003.0 1006 | 2018-10-01,7403.0 1007 | 2018-10-02,6215.0 1008 | 2018-10-03,6565.0 1009 | 2018-10-04,6634.0 1010 | 2018-10-05,5587.0 1011 | 2018-10-06,3981.0 1012 | 2018-10-07,2933.0 1013 | 2018-10-08,5517.0 1014 | 2018-10-09,5727.0 1015 | 2018-10-10,7263.0 1016 | 2018-10-11,7403.0 1017 | 2018-10-12,5866.0 1018 | 2018-10-13,4190.0 1019 | 2018-10-14,2933.0 1020 | 2018-10-15,6215.0 1021 | 2018-10-16,5587.0 1022 | 2018-10-17,6146.0 1023 | 2018-10-18,5587.0 1024 | 2018-10-19,5377.0 1025 | 2018-10-20,3352.0 1026 | 2018-10-21,3631.0 1027 | 2018-10-22,5238.0 1028 | 2018-10-23,5796.0 1029 | 2018-10-24,6634.0 1030 | 2018-10-25,6634.0 1031 | 2018-10-26,6774.0 1032 | 2018-10-27,4190.0 1033 | 2018-10-28,3352.0 1034 | 2018-10-29,5866.0 1035 | 2018-10-30,7053.0 1036 | 2018-10-31,6006.0 1037 | 2018-11-01,7542.0 1038 | 2018-11-02,7053.0 1039 | 2018-11-03,5308.0 1040 | 2018-11-04,4330.0 1041 | 2018-11-05,8380.0 1042 | 2018-11-06,6774.0 1043 | 2018-11-07,7542.0 1044 | 2018-11-08,8869.0 1045 | 2018-11-09,5727.0 1046 | 2018-11-10,4958.0 1047 | 2018-11-11,3492.0 1048 | 2018-11-12,7612.0 1049 | 2018-11-13,9568.0 1050 | 2018-11-14,9079.0 1051 | 2018-11-15,16691.0 1052 | 2018-11-16,9079.0 1053 | 2018-11-17,5866.0 1054 | 2018-11-18,4749.0 1055 | 2018-11-19,8869.0 1056 | 2018-11-20,7682.0 1057 | 2018-11-21,8799.0 1058 | 2018-11-22,3771.0 1059 | 2018-11-23,6774.0 1060 | 2018-11-24,4749.0 1061 | 2018-11-25,4889.0 1062 | 2018-11-26,10056.0 1063 | 2018-11-27,9218.0 1064 | 2018-11-28,7822.0 1065 | 2018-11-29,7891.0 1066 | 2018-11-30,7612.0 1067 | 2018-12-01,5238.0 1068 | 2018-12-02,4819.0 1069 | 2018-12-03,7612.0 1070 | 2018-12-04,7263.0 1071 | 2018-12-05,6704.0 1072 | 2018-12-06,7403.0 1073 | 2018-12-07,6285.0 1074 | 2018-12-08,3911.0 1075 | 2018-12-09,3911.0 1076 | 2018-12-10,8101.0 1077 | 2018-12-11,8241.0 1078 | 2018-12-12,7961.0 1079 | 2018-12-13,8660.0 1080 | 2018-12-14,8380.0 1081 | 2018-12-15,5377.0 1082 | 2018-12-16,4470.0 1083 | 2018-12-17,7822.0 1084 | 2018-12-18,10056.0 1085 | 2018-12-19,9149.0 1086 | 2018-12-20,8869.0 1087 | 2018-12-21,6704.0 1088 | 2018-12-22,4819.0 1089 | 2018-12-23,3073.0 1090 | 2018-12-24,4190.0 1091 | 2018-12-25,2933.0 1092 | 2018-12-26,7542.0 1093 | 2018-12-27,7053.0 1094 | 2018-12-28,6634.0 1095 | 2018-12-29,3631.0 1096 | 2018-12-30,3701.0 1097 | 2018-12-31,5796.0 1098 | 2019-01-01,4539.0 1099 | 2019-01-02,8590.0 1100 | 2019-01-03,8101.0 1101 | 2019-01-04,7682.0 1102 | 2019-01-05,4609.0 1103 | 2019-01-06,3562.0 1104 | 2019-01-07,8590.0 1105 | 2019-01-08,6774.0 1106 | 2019-01-09,9009.0 1107 | 2019-01-10,8380.0 1108 | 2019-01-11,6984.0 1109 | 2019-01-12,4539.0 1110 | 2019-01-13,4260.0 1111 | 2019-01-14,7612.0 1112 | 2019-01-15,7752.0 1113 | 2019-01-16,7752.0 1114 | 2019-01-17,8380.0 1115 | 2019-01-18,7682.0 1116 | 2019-01-19,3701.0 1117 | 2019-01-20,4400.0 1118 | 2019-01-21,6355.0 1119 | 2019-01-22,6774.0 1120 | 2019-01-23,7891.0 1121 | 2019-01-24,6355.0 1122 | 2019-01-25,6984.0 1123 | 2019-01-26,4609.0 1124 | 2019-01-27,3352.0 1125 | 2019-01-28,6355.0 1126 | 2019-01-29,5866.0 1127 | 2019-01-30,7822.0 1128 | 2019-01-31,8939.0 1129 | 2019-02-01,7682.0 1130 | 2019-02-02,5796.0 1131 | 2019-02-03,3841.0 1132 | 2019-02-04,6704.0 1133 | 2019-02-05,8590.0 1134 | 2019-02-06,7822.0 1135 | 2019-02-07,8799.0 1136 | 2019-02-08,7263.0 1137 | 2019-02-09,4819.0 1138 | 2019-02-10,2793.0 1139 | 2019-02-11,7542.0 1140 | 2019-02-12,7961.0 1141 | 2019-02-13,8380.0 1142 | 2019-02-14,8101.0 1143 | 2019-02-15,7123.0 1144 | 2019-02-16,5308.0 1145 | 2019-02-17,3841.0 1146 | 2019-02-18,6285.0 1147 | 2019-02-19,6984.0 1148 | 2019-02-20,8031.0 1149 | 2019-02-21,7333.0 1150 | 2019-02-22,7822.0 1151 | 2019-02-23,4819.0 1152 | 2019-02-24,4470.0 1153 | 2019-02-25,8101.0 1154 | 2019-02-26,8101.0 1155 | 2019-02-27,9707.0 1156 | 2019-02-28,10196.0 1157 | 2019-03-01,7333.0 1158 | 2019-03-02,6006.0 1159 | 2019-03-03,4260.0 1160 | 2019-03-04,8660.0 1161 | 2019-03-05,7403.0 1162 | 2019-03-06,7891.0 1163 | 2019-03-07,7333.0 1164 | 2019-03-08,7403.0 1165 | 2019-03-09,4470.0 1166 | 2019-03-10,4051.0 1167 | 2019-03-11,7123.0 1168 | 2019-03-12,7263.0 1169 | 2019-03-13,7682.0 1170 | 2019-03-14,7612.0 1171 | 2019-03-15,6984.0 1172 | 2019-03-16,4609.0 1173 | 2019-03-17,3282.0 1174 | 2019-03-18,6285.0 1175 | 2019-03-19,6495.0 1176 | 2019-03-20,7123.0 1177 | 2019-03-21,7752.0 1178 | 2019-03-22,6704.0 1179 | 2019-03-23,3841.0 1180 | 2019-03-24,4330.0 1181 | 2019-03-25,7333.0 1182 | 2019-03-26,5796.0 1183 | 2019-03-27,7403.0 1184 | 2019-03-28,8311.0 1185 | 2019-03-29,6425.0 1186 | 2019-03-30,4400.0 1187 | 2019-03-31,3771.0 1188 | 2019-04-01,7263.0 1189 | 2019-04-02,7961.0 1190 | 2019-04-03,8031.0 1191 | 2019-04-04,6634.0 1192 | 2019-04-05,7053.0 1193 | 2019-04-06,4051.0 1194 | 2019-04-07,4330.0 1195 | 2019-04-08,7123.0 1196 | 2019-04-09,6774.0 1197 | 2019-04-10,6984.0 1198 | 2019-04-11,6634.0 1199 | 2019-04-12,7472.0 1200 | 2019-04-13,4749.0 1201 | 2019-04-14,4330.0 1202 | 2019-04-15,7123.0 1203 | 2019-04-16,6146.0 1204 | 2019-04-17,8101.0 1205 | 2019-04-18,7123.0 1206 | 2019-04-19,5168.0 1207 | 2019-04-20,3562.0 1208 | 2019-04-21,70.0 1209 | 2019-04-22,3771.0 1210 | 2019-04-23,5238.0 1211 | 2019-04-24,6146.0 1212 | 2019-04-25,6704.0 1213 | 2019-04-26,6634.0 1214 | 2019-04-27,4190.0 1215 | 2019-04-28,3562.0 1216 | 2019-04-29,6984.0 1217 | 2019-04-30,7053.0 1218 | 2019-05-01,7891.0 1219 | 2019-05-02,8869.0 1220 | 2019-05-03,6634.0 1221 | 2019-05-04,4749.0 1222 | 2019-05-05,4470.0 1223 | 2019-05-06,8660.0 1224 | 2019-05-07,7403.0 1225 | 2019-05-08,7333.0 1226 | 2019-05-09,8311.0 1227 | 2019-05-10,6285.0 1228 | 2019-05-11,4819.0 1229 | 2019-05-12,3212.0 1230 | 2019-05-13,5517.0 1231 | 2019-05-14,6565.0 1232 | 2019-05-15,7822.0 1233 | 2019-05-16,6495.0 1234 | 2019-05-17,5936.0 1235 | 2019-05-18,3562.0 1236 | 2019-05-19,2793.0 1237 | 2019-05-20,6285.0 1238 | 2019-05-21,5587.0 1239 | 2019-05-22,6355.0 1240 | 2019-05-23,6076.0 1241 | 2019-05-24,5866.0 1242 | 2019-05-25,3073.0 1243 | 2019-05-26,3282.0 1244 | 2019-05-27,4120.0 1245 | 2019-05-28,6914.0 1246 | 2019-05-29,8171.0 1247 | 2019-05-30,8031.0 1248 | 2019-05-31,6774.0 1249 | 2019-06-01,4260.0 1250 | 2019-06-02,4051.0 1251 | 2019-06-03,8101.0 1252 | 2019-06-04,6355.0 1253 | 2019-06-05,7193.0 1254 | 2019-06-06,7472.0 1255 | 2019-06-07,6076.0 1256 | 2019-06-08,3073.0 1257 | 2019-06-09,3631.0 1258 | 2019-06-10,6006.0 1259 | 2019-06-11,7403.0 1260 | 2019-06-12,8171.0 1261 | 2019-06-13,6215.0 1262 | 2019-06-14,7123.0 1263 | 2019-06-15,4819.0 1264 | 2019-06-16,2724.0 1265 | 2019-06-17,7123.0 1266 | 2019-06-18,7193.0 1267 | 2019-06-19,5796.0 1268 | 2019-06-20,7403.0 1269 | 2019-06-21,7542.0 1270 | 2019-06-22,3631.0 1271 | 2019-06-23,3143.0 1272 | 2019-06-24,6076.0 1273 | 2019-06-25,5377.0 1274 | 2019-06-26,6355.0 1275 | 2019-06-27,7123.0 1276 | 2019-06-28,6006.0 1277 | 2019-06-29,4260.0 1278 | 2019-06-30,2724.0 1279 | 2019-07-01,7752.0 1280 | 2019-07-02,6774.0 1281 | 2019-07-03,6914.0 1282 | 2019-07-04,3911.0 1283 | 2019-07-05,5098.0 1284 | 2019-07-06,3212.0 1285 | 2019-07-07,3073.0 1286 | 2019-07-08,6006.0 1287 | 2019-07-09,6006.0 1288 | 2019-07-10,6355.0 1289 | 2019-07-11,7403.0 1290 | 2019-07-12,7472.0 1291 | 2019-07-13,3841.0 1292 | 2019-07-14,2933.0 1293 | 2019-07-15,7053.0 1294 | 2019-07-16,6285.0 1295 | 2019-07-17,5168.0 1296 | 2019-07-18,5098.0 1297 | 2019-07-19,4400.0 1298 | 2019-07-20,3143.0 1299 | 2019-07-21,3143.0 1300 | 2019-07-22,5936.0 1301 | 2019-07-23,4400.0 1302 | 2019-07-24,5098.0 1303 | 2019-07-25,6704.0 1304 | 2019-07-26,6006.0 1305 | 2019-07-27,3492.0 1306 | 2019-07-28,3003.0 1307 | 2019-07-29,5308.0 1308 | 2019-07-30,5866.0 1309 | 2019-07-31,5796.0 1310 | 2019-08-01,6006.0 1311 | 2019-08-02,5936.0 1312 | 2019-08-03,3282.0 1313 | 2019-08-04,3701.0 1314 | 2019-08-05,6006.0 1315 | 2019-08-06,5168.0 1316 | 2019-08-07,6006.0 1317 | 2019-08-08,6215.0 1318 | 2019-08-09,5308.0 1319 | 2019-08-10,4190.0 1320 | 2019-08-11,2514.0 1321 | 2019-08-12,6146.0 1322 | 2019-08-13,6984.0 1323 | 2019-08-14,5727.0 1324 | 2019-08-15,8101.0 1325 | 2019-08-16,4889.0 1326 | 2019-08-17,3003.0 1327 | 2019-08-18,3143.0 1328 | 2019-08-19,4889.0 1329 | 2019-08-20,5238.0 1330 | 2019-08-21,5587.0 1331 | 2019-08-22,6774.0 1332 | 2019-08-23,5796.0 1333 | 2019-08-24,3631.0 1334 | 2019-08-25,2514.0 1335 | 2019-08-26,6565.0 1336 | 2019-08-27,5517.0 1337 | 2019-08-28,6076.0 1338 | 2019-08-29,6215.0 1339 | 2019-08-30,4958.0 1340 | 2019-08-31,3003.0 1341 | 2019-09-01,2305.0 1342 | 2019-09-02,4749.0 1343 | 2019-09-03,7612.0 1344 | 2019-09-04,6984.0 1345 | 2019-09-05,6495.0 1346 | 2019-09-06,7263.0 1347 | 2019-09-07,3841.0 1348 | 2019-09-08,2933.0 1349 | 2019-09-09,6285.0 1350 | 2019-09-10,4819.0 1351 | 2019-09-11,4400.0 1352 | 2019-09-12,6285.0 1353 | 2019-09-13,5866.0 1354 | 2019-09-14,3422.0 1355 | 2019-09-15,2863.0 1356 | 2019-09-16,6565.0 1357 | 2019-09-17,5308.0 1358 | 2019-09-18,5447.0 1359 | 2019-09-19,5936.0 1360 | 2019-09-20,5866.0 1361 | 2019-09-21,3492.0 1362 | 2019-09-22,2863.0 1363 | 2019-09-23,5727.0 1364 | 2019-09-24,5727.0 1365 | 2019-09-25,5936.0 1366 | 2019-09-26,6146.0 1367 | 2019-09-27,5796.0 1368 | 2019-09-28,4190.0 1369 | 2019-09-29,2793.0 1370 | 2019-09-30,5936.0 1371 | 2019-10-01,6495.0 1372 | 2019-10-02,5447.0 1373 | 2019-10-03,6565.0 1374 | 2019-10-04,4819.0 1375 | 2019-10-05,3282.0 1376 | 2019-10-06,3631.0 1377 | 2019-10-07,6215.0 1378 | 2019-10-08,5727.0 1379 | 2019-10-09,5936.0 1380 | 2019-10-10,6495.0 1381 | 2019-10-11,6006.0 1382 | 2019-10-12,3143.0 1383 | 2019-10-13,2654.0 1384 | 2019-10-14,5308.0 1385 | 2019-10-15,5308.0 1386 | 2019-10-16,5098.0 1387 | 2019-10-17,6844.0 1388 | 2019-10-18,5866.0 1389 | 2019-10-19,3422.0 1390 | 2019-10-20,2444.0 1391 | 2019-10-21,6565.0 1392 | 2019-10-22,4679.0 1393 | 2019-10-23,6425.0 1394 | 2019-10-24,6704.0 1395 | 2019-10-25,4889.0 1396 | 2019-10-26,3771.0 1397 | 2019-10-27,2863.0 1398 | 2019-10-28,5796.0 1399 | 2019-10-29,5168.0 1400 | 2019-10-30,4679.0 1401 | 2019-10-31,5028.0 1402 | 2019-11-01,5866.0 1403 | 2019-11-02,3562.0 1404 | 2019-11-03,2863.0 1405 | 2019-11-04,5657.0 1406 | 2019-11-05,5796.0 1407 | 2019-11-06,6425.0 1408 | 2019-11-07,5936.0 1409 | 2019-11-08,6774.0 1410 | 2019-11-09,3492.0 1411 | 2019-11-10,2793.0 1412 | 2019-11-11,5308.0 1413 | 2019-11-12,6565.0 1414 | 2019-11-13,9917.0 1415 | 2019-11-14,6914.0 1416 | 2019-11-15,5377.0 1417 | 2019-11-16,3981.0 1418 | 2019-11-17,3212.0 1419 | 2019-11-18,6355.0 1420 | 2019-11-19,5308.0 1421 | 2019-11-20,5866.0 1422 | 2019-11-21,6146.0 1423 | 2019-11-22,5796.0 1424 | 2019-11-23,3911.0 1425 | 2019-11-24,3422.0 1426 | 2019-11-25,7472.0 1427 | 2019-11-26,6425.0 1428 | 2019-11-27,6495.0 1429 | 2019-11-28,3212.0 1430 | 2019-11-29,5098.0 1431 | 2019-11-30,5308.0 1432 | 2019-12-01,3701.0 1433 | 2019-12-02,8520.0 1434 | 2019-12-03,6634.0 1435 | 2019-12-04,7053.0 1436 | 2019-12-05,6146.0 1437 | 2019-12-06,5866.0 1438 | 2019-12-07,4679.0 1439 | 2019-12-08,3422.0 1440 | 2019-12-09,5936.0 1441 | 2019-12-10,5028.0 1442 | 2019-12-11,6076.0 1443 | 2019-12-12,6285.0 1444 | 2019-12-13,4679.0 1445 | 2019-12-14,4470.0 1446 | 2019-12-15,3492.0 1447 | 2019-12-16,7263.0 1448 | 2019-12-17,6704.0 1449 | 2019-12-18,6704.0 1450 | 2019-12-19,8311.0 1451 | 2019-12-20,7822.0 1452 | 2019-12-21,5028.0 1453 | 2019-12-22,3282.0 1454 | 2019-12-23,6984.0 1455 | 2019-12-24,4330.0 1456 | 2019-12-25,3282.0 1457 | 2019-12-26,6634.0 1458 | 2019-12-27,6285.0 1459 | 2019-12-28,4889.0 1460 | 2019-12-29,3562.0 1461 | 2019-12-30,6704.0 1462 | 2019-12-31,5377.0 1463 | 2020-01-01,4400.0 1464 | 2020-01-02,8520.0 1465 | 2020-01-03,6215.0 1466 | 2020-01-04,4330.0 1467 | 2020-01-05,2724.0 1468 | 2020-01-06,6844.0 1469 | 2020-01-07,6774.0 1470 | 2020-01-08,6844.0 1471 | 2020-01-09,7403.0 1472 | 2020-01-10,6634.0 1473 | 2020-01-11,4051.0 1474 | 2020-01-12,3701.0 1475 | 2020-01-13,6565.0 1476 | 2020-01-14,6006.0 1477 | 2020-01-15,7822.0 1478 | 2020-01-16,8241.0 1479 | 2020-01-17,6285.0 1480 | 2020-01-18,3911.0 1481 | 2020-01-19,3562.0 1482 | 2020-01-20,6285.0 1483 | 2020-01-21,7472.0 1484 | 2020-01-22,6146.0 1485 | 2020-01-23,6844.0 1486 | 2020-01-24,5587.0 1487 | 2020-01-25,3492.0 1488 | 2020-01-26,3701.0 1489 | 2020-01-27,8171.0 1490 | 2020-01-28,7193.0 1491 | 2020-01-29,8031.0 1492 | 2020-01-30,6984.0 1493 | 2020-01-31,6425.0 1494 | 2020-02-01,4470.0 1495 | 2020-02-02,3352.0 1496 | 2020-02-03,8380.0 1497 | 2020-02-04,8171.0 1498 | 2020-02-05,8031.0 1499 | 2020-02-06,6984.0 1500 | 2020-02-07,6984.0 1501 | 2020-02-08,4819.0 1502 | 2020-02-09,4120.0 1503 | 2020-02-10,8101.0 1504 | 2020-02-11,5238.0 1505 | 2020-02-12,7193.0 1506 | 2020-02-13,7263.0 1507 | 2020-02-14,7333.0 1508 | 2020-02-15,4400.0 1509 | 2020-02-16,4470.0 1510 | 2020-02-17,6146.0 1511 | 2020-02-18,7053.0 1512 | 2020-02-19,8660.0 1513 | 2020-02-20,4400.0 1514 | 2020-02-21,7752.0 1515 | 2020-02-22,4120.0 1516 | 2020-02-23,4609.0 1517 | 2020-02-24,7542.0 1518 | 2020-02-25,7682.0 1519 | 2020-02-26,7123.0 1520 | 2020-02-27,7403.0 1521 | 2020-02-28,6914.0 1522 | 2020-02-29,4470.0 1523 | 2020-03-01,4749.0 1524 | 2020-03-02,8241.0 1525 | 2020-03-03,7263.0 1526 | 2020-03-04,8031.0 1527 | 2020-03-05,8590.0 1528 | 2020-03-06,7612.0 1529 | 2020-03-07,5447.0 1530 | 2020-03-08,3073.0 1531 | 2020-03-09,8241.0 1532 | 2020-03-10,5796.0 1533 | 2020-03-11,6984.0 1534 | 2020-03-12,8101.0 1535 | 2020-03-13,5657.0 1536 | 2020-03-14,3562.0 1537 | 2020-03-15,3911.0 1538 | 2020-03-16,8380.0 1539 | 2020-03-17,6565.0 1540 | 2020-03-18,7891.0 1541 | 2020-03-19,8311.0 1542 | 2020-03-20,7403.0 1543 | 2020-03-21,5587.0 1544 | 2020-03-22,4400.0 1545 | 2020-03-23,8380.0 1546 | 2020-03-24,7822.0 1547 | 2020-03-25,7961.0 1548 | 2020-03-26,9917.0 1549 | 2020-03-27,8380.0 1550 | 2020-03-28,6495.0 1551 | 2020-03-29,4260.0 1552 | 2020-03-30,10196.0 1553 | 2020-03-31,10266.0 1554 | 2020-04-01,9707.0 1555 | 2020-04-02,10056.0 1556 | 2020-04-03,9707.0 1557 | 2020-04-04,6215.0 1558 | 2020-04-05,4400.0 1559 | 2020-04-06,8869.0 1560 | 2020-04-07,8171.0 1561 | 2020-04-08,7682.0 1562 | 2020-04-09,9358.0 1563 | 2020-04-10,7822.0 1564 | 2020-04-11,5098.0 1565 | 2020-04-12,3841.0 1566 | 2020-04-13,33172.0 1567 | 2020-04-14,16202.0 1568 | 2020-04-15,18995.0 1569 | 2020-04-16,12571.0 1570 | 2020-04-17,10755.0 1571 | 2020-04-18,6355.0 1572 | 2020-04-19,5098.0 1573 | 2020-04-20,10336.0 1574 | 2020-04-21,9009.0 1575 | 2020-04-22,9149.0 1576 | 2020-04-23,8869.0 1577 | 2020-04-24,8869.0 1578 | 2020-04-25,5238.0 1579 | 2020-04-26,3492.0 1580 | 2020-04-27,9637.0 1581 | 2020-04-28,8799.0 1582 | 2020-04-29,7542.0 1583 | 2020-04-30,9358.0 1584 | 2020-05-01,8939.0 1585 | 2020-05-02,6425.0 1586 | 2020-05-03,3631.0 1587 | 2020-05-04,9218.0 1588 | 2020-05-05,8171.0 1589 | 2020-05-06,7961.0 1590 | 2020-05-07,7333.0 1591 | 2020-05-08,8660.0 1592 | 2020-05-09,4889.0 1593 | 2020-05-10,2863.0 1594 | 2020-05-11,9218.0 1595 | 2020-05-12,8799.0 1596 | 2020-05-13,9358.0 1597 | 2020-05-14,9917.0 1598 | 2020-05-15,8380.0 1599 | 2020-05-16,5517.0 1600 | 2020-05-17,4609.0 1601 | 2020-05-18,8031.0 1602 | 2020-05-19,8241.0 1603 | 2020-05-20,7123.0 1604 | 2020-05-21,7472.0 1605 | 2020-05-22,7053.0 1606 | 2020-05-23,4819.0 1607 | 2020-05-24,3981.0 1608 | 2020-05-25,5098.0 1609 | 2020-05-26,9987.0 1610 | 2020-05-27,7752.0 1611 | 2020-05-28,8241.0 1612 | 2020-05-29,9358.0 1613 | 2020-05-30,5587.0 1614 | 2020-05-31,4051.0 1615 | 2020-06-01,9428.0 1616 | 2020-06-02,8241.0 1617 | 2020-06-03,8171.0 1618 | 2020-06-04,7612.0 1619 | 2020-06-05,7822.0 1620 | 2020-06-06,3911.0 1621 | 2020-06-07,3631.0 1622 | 2020-06-08,8730.0 1623 | 2020-06-09,7682.0 1624 | 2020-06-10,6634.0 1625 | 2020-06-11,8031.0 1626 | 2020-06-12,7752.0 1627 | 2020-06-13,4958.0 1628 | 2020-06-14,3492.0 1629 | 2020-06-15,7612.0 1630 | 2020-06-16,6215.0 1631 | 2020-06-17,6355.0 1632 | 2020-06-18,6704.0 1633 | 2020-06-19,7053.0 1634 | 2020-06-20,4539.0 1635 | 2020-06-21,4260.0 1636 | 2020-06-22,7053.0 1637 | 2020-06-23,8520.0 1638 | 2020-06-24,7961.0 1639 | 2020-06-25,6774.0 1640 | 2020-06-26,6006.0 1641 | 2020-06-27,4400.0 1642 | 2020-06-28,4051.0 1643 | 2020-06-29,6704.0 1644 | 2020-06-30,8241.0 1645 | 2020-07-01,6285.0 1646 | 2020-07-02,7472.0 1647 | 2020-07-03,4819.0 1648 | 2020-07-04,3701.0 1649 | 2020-07-05,4190.0 1650 | 2020-07-06,7682.0 1651 | 2020-07-07,8311.0 1652 | 2020-07-08,8450.0 1653 | 2020-07-09,9079.0 1654 | 2020-07-10,8031.0 1655 | 2020-07-11,4958.0 1656 | 2020-07-12,4609.0 1657 | 2020-07-13,7891.0 1658 | 2020-07-14,7961.0 1659 | 2020-07-15,7961.0 1660 | 2020-07-16,7891.0 1661 | 2020-07-17,7263.0 1662 | 2020-07-18,4819.0 1663 | 2020-07-19,3701.0 1664 | 2020-07-20,6704.0 1665 | 2020-07-21,7682.0 1666 | 2020-07-22,7403.0 1667 | 2020-07-23,6425.0 1668 | 2020-07-24,6984.0 1669 | 2020-07-25,5028.0 1670 | 2020-07-26,4120.0 1671 | 2020-07-27,7752.0 1672 | 2020-07-28,8869.0 1673 | 2020-07-29,8380.0 1674 | 2020-07-30,8171.0 1675 | 2020-07-31,7403.0 1676 | 2020-08-01,5308.0 1677 | 2020-08-02,4470.0 1678 | 2020-08-03,8520.0 1679 | 2020-08-04,8101.0 1680 | 2020-08-05,7612.0 1681 | 2020-08-06,7752.0 1682 | 2020-08-07,6565.0 1683 | 2020-08-08,4260.0 1684 | 2020-08-09,3352.0 1685 | 2020-08-10,7403.0 1686 | 2020-08-11,7542.0 1687 | 2020-08-12,8520.0 1688 | 2020-08-13,8171.0 1689 | 2020-08-14,6495.0 1690 | 2020-08-15,4470.0 1691 | 2020-08-16,3631.0 1692 | 2020-08-17,8241.0 1693 | 2020-08-18,6355.0 1694 | 2020-08-19,6355.0 1695 | 2020-08-20,6285.0 1696 | 2020-08-21,6774.0 1697 | 2020-08-22,4260.0 1698 | 2020-08-23,3841.0 1699 | 2020-08-24,7472.0 1700 | 2020-08-25,7542.0 1701 | 2020-08-26,6565.0 1702 | 2020-08-27,7961.0 1703 | 2020-08-28,6565.0 1704 | 2020-08-29,3771.0 1705 | 2020-08-30,4400.0 1706 | 2020-08-31,8311.0 1707 | 2020-09-01,9428.0 1708 | 2020-09-02,7053.0 1709 | 2020-09-03,7333.0 1710 | 2020-09-04,5936.0 1711 | 2020-09-05,4190.0 1712 | 2020-09-06,3212.0 1713 | 2020-09-07,3631.0 1714 | 2020-09-08,7472.0 1715 | 2020-09-09,6215.0 1716 | 2020-09-10,6355.0 1717 | 2020-09-11,6425.0 1718 | 2020-09-12,3422.0 1719 | 2020-09-13,2933.0 1720 | 2020-09-14,6355.0 1721 | 2020-09-15,6215.0 1722 | 2020-09-16,6425.0 1723 | 2020-09-17,7333.0 1724 | 2020-09-18,5377.0 1725 | 2020-09-19,3911.0 1726 | 2020-09-20,2584.0 1727 | 2020-09-21,5727.0 1728 | 2020-09-22,4330.0 1729 | 2020-09-23,5308.0 1730 | 2020-09-24,6146.0 1731 | 2020-09-25,4051.0 1732 | 2020-09-26,3212.0 1733 | 2020-09-27,3771.0 1734 | 2020-09-28,7333.0 1735 | 2020-09-29,7263.0 1736 | 2020-09-30,6984.0 1737 | 2020-10-01,5587.0 1738 | 2020-10-02,6215.0 1739 | 2020-10-03,3282.0 1740 | 2020-10-04,3841.0 1741 | 2020-10-05,6215.0 1742 | 2020-10-06,6146.0 1743 | 2020-10-07,6076.0 1744 | 2020-10-08,7123.0 1745 | 2020-10-09,5377.0 1746 | 2020-10-10,3771.0 1747 | 2020-10-11,3212.0 1748 | 2020-10-12,5517.0 1749 | 2020-10-13,6774.0 1750 | 2020-10-14,6844.0 1751 | 2020-10-15,7961.0 1752 | 2020-10-16,7123.0 1753 | 2020-10-17,4330.0 1754 | 2020-10-18,3631.0 1755 | 2020-10-19,6146.0 1756 | 2020-10-20,4609.0 1757 | 2020-10-21,5238.0 1758 | 2020-10-22,5308.0 1759 | 2020-10-23,5447.0 1760 | 2020-10-24,2933.0 1761 | 2020-10-25,2863.0 1762 | 2020-10-26,6285.0 1763 | 2020-10-27,5308.0 1764 | 2020-10-28,5098.0 1765 | 2020-10-29,5936.0 1766 | 2020-10-30,5587.0 1767 | 2020-10-31,3352.0 1768 | 2020-11-01,3841.0 1769 | 2020-11-02,6634.0 1770 | 2020-11-03,5727.0 1771 | 2020-11-04,5587.0 1772 | 2020-11-05,5936.0 1773 | 2020-11-06,4609.0 1774 | 2020-11-07,3143.0 1775 | 2020-11-08,2654.0 1776 | 2020-11-09,5587.0 1777 | 2020-11-10,5028.0 1778 | 2020-11-11,6006.0 1779 | 2020-11-12,12990.0 1780 | 2020-11-13,6215.0 1781 | 2020-11-14,5587.0 1782 | 2020-11-15,3282.0 1783 | 2020-11-16,6215.0 1784 | 2020-11-17,5796.0 1785 | 2020-11-18,5168.0 1786 | 2020-11-19,6146.0 1787 | 2020-11-20,6006.0 1788 | 2020-11-21,4260.0 1789 | 2020-11-22,3492.0 1790 | 2020-11-23,7333.0 1791 | 2020-11-24,5936.0 1792 | 2020-11-25,5727.0 1793 | 2020-11-26,3003.0 1794 | 2020-11-27,5168.0 1795 | 2020-11-28,5098.0 1796 | 2020-11-29,3981.0 1797 | 2020-11-30,6774.0 1798 | 2020-12-01,9288.0 1799 | 2020-12-02,7123.0 1800 | 2020-12-03,7752.0 1801 | 2020-12-04,6774.0 1802 | 2020-12-05,3422.0 1803 | 2020-12-06,3212.0 1804 | 2020-12-07,7472.0 1805 | 2020-12-08,6565.0 1806 | 2020-12-09,5447.0 1807 | 2020-12-10,6914.0 1808 | 2020-12-11,6704.0 1809 | 2020-12-12,4958.0 1810 | 2020-12-13,3562.0 1811 | 2020-12-14,6215.0 1812 | 2020-12-15,7333.0 1813 | 2020-12-16,7542.0 1814 | 2020-12-17,6774.0 1815 | 2020-12-18,6006.0 1816 | 2020-12-19,4190.0 1817 | 2020-12-20,2863.0 1818 | 2020-12-21,6076.0 1819 | 2020-12-22,6006.0 1820 | 2020-12-23,6634.0 1821 | 2020-12-24,3352.0 1822 | 2020-12-25,2235.0 1823 | 2020-12-26,3422.0 1824 | 2020-12-27,2863.0 1825 | 2020-12-28,5727.0 1826 | 2020-12-29,7123.0 1827 | 2020-12-30,17250.0 1828 | 2020-12-31,8311.0 1829 | -------------------------------------------------------------------------------- /ideation.txt: -------------------------------------------------------------------------------- 1 | 2 | ## Inspiration 3 | * https://github.com/antoinecarme/pyaf 4 | * https://github.com/AutoViML/Auto_TS 5 | * https://github.com/winedarksea/AutoTS 6 | * https://github.com/antoinecarme/pyaf 7 | * https://github.com/firmai/atspy 8 | 9 | ## Include in Forecasting: 10 | * https://github.com/facebook/prophet 11 | * https://github.com/awslabs/gluon-ts 12 | * https://github.com/alkaline-ml/pmdarima 13 | * autoarima 14 | * pytorch_forecasting 15 | * prophet 16 | * nbeats 17 | * wavenet 18 | * PyLightGBM 19 | 20 | 21 | ARIMA [14], 22 | the Holt–Winters additive model (HWAAS) [15], 23 | TBAT [16], 24 | Facebook’s Prophet [17], 25 | DeepAR [18] (as implemented in [19]) 26 | and N-Beats [20], 27 | 28 | non-parametric truncated Dirichlet Process 29 | 30 | arima prophet nbeats wavenet 31 | -------------------------------------------------------------------------------- /requirements-no-vers.txt: -------------------------------------------------------------------------------- 1 | addressable 2 | colorama 3 | fbprophet 4 | gluonts 5 | google-api-core 6 | google-api-python-client 7 | google-auth-httplib2 8 | google-auth 9 | holidays 10 | httplib2 11 | inspect-it 12 | keyring 13 | lightgbm 14 | matplotlib 15 | nbeats-pytorch 16 | numpy 17 | oauth2client 18 | pandas 19 | pmdarima 20 | prettytable 21 | pystan 22 | python-dateutil 23 | pyyaml 24 | requests 25 | rsa 26 | scikit-learn 27 | scipy 28 | seasonal 29 | sklearn 30 | snakify 31 | statsmodels 32 | tbats 33 | torch 34 | tqdm 35 | tsfresh 36 | werkzeug 37 | mxnet 38 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | addressable==1.4.2 2 | cachetools==4.2.0; python_version >= "3.5" and python_version < "4.0" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0") 3 | certifi==2020.12.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" 4 | cffi==1.14.4; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "linux" or sys_platform == "linux" and python_version >= "3.6" and python_full_version >= "3.6.0" 5 | chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" 6 | click==7.1.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6" 7 | cloudpickle==1.6.0; python_version >= "3.6" 8 | cmdstanpy==0.9.5; python_version >= "3" 9 | colorama==0.4.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") 10 | contextvars==2.4; python_version < "3.7" and python_version >= "3.6" 11 | convertdate==2.2.0; python_version >= "3" 12 | cryptography==3.3.1; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "linux" or sys_platform == "linux" and python_version >= "3.6" and python_full_version >= "3.6.0" 13 | cycler==0.10.0; python_version >= "3.6" 14 | cython==0.29.17; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.6" 15 | dask==2020.12.0; python_version >= "3.6" 16 | dataclasses==0.8; python_version >= "3.6" and python_version < "3.7" and python_full_version >= "3.6.2" 17 | distributed==2020.12.0; python_version >= "3.6" 18 | docopt==0.6.2 19 | ephem==3.7.7.1; python_version >= "3" and python_version < "4" 20 | fbprophet==0.7.1; python_version >= "3" 21 | fsspec==0.8.5; python_version > "3.6" 22 | gluonts==0.6.4; python_version >= "3.6" 23 | google-api-core==1.24.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") 24 | google-api-python-client==1.12.8; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0") 25 | google-auth-httplib2==0.0.4 26 | google-auth==1.24.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") 27 | googleapis-common-protos==1.52.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" 28 | graphviz==0.8.4; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" 29 | heapdict==1.0.1; python_version >= "3.6" 30 | holidays==0.10.4 31 | httplib2==0.18.1 32 | idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" 33 | immutables==0.14; python_version < "3.7" and python_version >= "3.6" 34 | importlib-metadata==3.3.0; python_version < "3.8" and python_version >= "3.6" 35 | inspect-it==0.3.2 36 | jeepney==0.6.0; sys_platform == "linux" and python_version >= "3.6" 37 | jinja2==2.11.2; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" 38 | joblib>=1.2.0; python_version >= "3.6" 39 | keyring==21.8.0; python_version >= "3.6" 40 | kiwisolver==1.3.1; python_version >= "3.6" 41 | korean-lunar-calendar==0.2.1; python_version >= "3" 42 | lightgbm==3.1.1 43 | locket==0.2.0; python_version >= "3.6" 44 | lunarcalendar==0.0.9; python_version >= "3" and python_version < "4" 45 | markupsafe==1.1.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0") 46 | matplotlib==3.2.2; python_version >= "3.6" 47 | msgpack==1.0.2; python_version >= "3.6" 48 | mxnet==1.7.0.post1 49 | nbeats-pytorch==1.3.1 50 | num2words==0.5.10 51 | numpy==1.19.3; python_version >= "3.6" 52 | oauth2client==4.1.3 53 | pandas==1.0.5; python_full_version >= "3.6.1" 54 | partd==1.1.0; python_version >= "3.6" 55 | patsy==0.5.1; python_version >= "3.6" 56 | pillow==8.0.1; python_version >= "3.6" 57 | pmdarima==1.8.0; python_version >= "3.6" 58 | prettytable==2.0.0; python_version >= "3.6" 59 | protobuf==3.14.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" 60 | psutil==5.8.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" 61 | pyasn1-modules==0.2.8; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" 62 | pyasn1==0.4.8; python_version >= "3.5" and python_version < "4" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6") and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0") 63 | pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "linux" or sys_platform == "linux" and python_version >= "3.6" and python_full_version >= "3.6.0" 64 | pydantic==1.6.1; python_version >= "3.6" 65 | pylev==1.3.0 66 | pymeeus==0.3.7; python_version >= "3" 67 | pyparsing==2.4.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.6" 68 | pystan==2.19.1.1 69 | python-dateutil==2.8.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.3.0") 70 | pytz==2019.3; python_full_version >= "3.6.1" and python_version >= "3" and python_version < "4" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0") 71 | pywin32-ctypes==0.2.0; sys_platform == "win32" and python_version >= "3.6" 72 | pyyaml==5.3.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") 73 | requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") 74 | rsa==4.6; python_version >= "3.5" and python_version < "4" 75 | scikit-learn==0.24.0; python_version >= "3.6" 76 | scipy==1.5.0; python_version >= "3.6" 77 | seasonal==0.3.1 78 | secretstorage==3.3.0; sys_platform == "linux" and python_version >= "3.6" 79 | setuptools-git==1.2; python_version >= "3" 80 | six==1.15.0; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "linux" or python_full_version >= "3.6.0" and python_version >= "3.6" and sys_platform == "linux" 81 | snakify==1.1.1 82 | sortedcontainers==2.3.0; python_version >= "3.6" 83 | statsmodels==0.12.1; python_version >= "3.6" 84 | tbats==1.1.0 85 | tblib==1.7.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6" 86 | threadpoolctl==2.1.0; python_version >= "3.6" 87 | toolz==0.11.1; python_version >= "3.6" 88 | torch==1.7.1; python_full_version >= "3.6.2" 89 | torchvision==0.8.2 90 | tornado==6.1 91 | tqdm==4.55.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0") 92 | tsfresh==0.17.0; python_version >= "3.5" 93 | typing-extensions==3.7.4.3; python_version < "3.8" and python_version >= "3.6" and python_full_version >= "3.6.2" 94 | ujson==1.35; python_version >= "3.6" 95 | unicode-slugify==0.1.3 96 | unidecode==1.1.2; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" 97 | uritemplate==3.0.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" 98 | urllib3==1.26.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.6" 99 | wcwidth==0.2.5; python_version >= "3.6" 100 | werkzeug==1.0.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") 101 | zict==2.0.0; python_version >= "3.6" 102 | zipp==3.4.0; python_version < "3.8" and python_version >= "3.6" 103 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import os 3 | import sys 4 | 5 | with open("requirements.txt", "r") as f: 6 | install_requires = f.read().splitlines() 7 | 8 | with open("README.md", "r", encoding="utf-8") as f: 9 | long_description = f.read() 10 | 11 | setup( 12 | name="forecastga", 13 | version="0.1.15", 14 | description="Automated Google Analytics Time Series in Python", 15 | long_description=long_description, 16 | long_description_content_type="text/markdown", 17 | url="https://github.com/jroakes/forecastga.git", 18 | author="jroakes", 19 | author_email="jroakes@gmail.com", 20 | license="MIT", 21 | packages=find_packages("src"), 22 | package_dir={"": "src"}, 23 | requires_python=">=3.6.2", 24 | install_requires=install_requires, 25 | include_package_data=True, 26 | zip_safe=False, 27 | ) 28 | -------------------------------------------------------------------------------- /src/forecastga/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | 4 | 5 | """ForecastGA: Main 6 | """ 7 | 8 | 9 | __version__ = "0.1.15" 10 | 11 | 12 | from forecastga.helpers.ga_data import get_ga_data 13 | from forecastga.auto import AutomatedModel 14 | from forecastga.helpers.data import print_model_info 15 | from forecastga.helpers.colab import plot_colab 16 | 17 | 18 | def help(): 19 | print("Welcome to ForecastGA") 20 | print() 21 | print("To use:") 22 | print() 23 | print("Find Model Info:") 24 | print("forecastga.print_model_info()") 25 | print() 26 | print("Initialize Model:") 27 | print() 28 | print( 29 | """ 30 | Google Analytics: 31 | 32 | data = { 'client_id': '', 33 | 'client_secret': '', 34 | 'ga_end_date': '2019-12-31', 35 | 'ga_metric': 'sessions', 36 | 'ga_segment': 'organic traffic', 37 | 'ga_start_date': '2018-01-01', 38 | 'ga_url': 'https://analytics.google.com/analytics/web/?authuser=2#/report-home/aXXXXXwXXXXXpXXXXXX', 39 | 'identity': '', 40 | 'omit_values_over': 2000000 41 | } 42 | 43 | model_list = ["TATS", "TBATS1", "TBATP1", "TBATS2", "ARIMA"] 44 | am = forecastga.AutomatedModel(data , model_list=model_list, forecast_len=30 ) 45 | 46 | """ 47 | ) 48 | print() 49 | print( 50 | """ 51 | Pandas DataFrame: 52 | 53 | # CSV with columns: Date and Sessions 54 | df = pd.read_csv('ga_sessions.csv') 55 | df.Date = pd.to_datetime(df.Date) 56 | df = df.set_index("Date") 57 | data = df.Sessions 58 | 59 | model_list = ["TATS", "TBATS1", "TBATP1", "TBATS2", "ARIMA"] 60 | am = forecastga.AutomatedModel(data , model_list=model_list, forecast_len=30 ) 61 | 62 | """ 63 | ) 64 | print() 65 | print("Forecast Insample:") 66 | print("forecast_in, performance = am.forecast_insample()") 67 | print() 68 | print("Forecast Outsample:") 69 | print("forecast_out = am.forecast_outsample()") 70 | print() 71 | print("Ensemble Performance:") 72 | print( 73 | "all_ensemble_in, all_ensemble_out, all_performance = am.ensemble(forecast_in, forecast_out)" 74 | ) 75 | print() 76 | print("Pretty Plot in Google Colab") 77 | print( 78 | 'forecastga.plot_colab(forecast_in, title="Insample Forecast", dark_mode=True)' 79 | ) 80 | -------------------------------------------------------------------------------- /src/forecastga/auto.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | 4 | 5 | """ForecastGA: AutoModel""" 6 | 7 | import importlib 8 | import warnings 9 | import torch 10 | import pandas as pd 11 | from statsmodels.tools.eval_measures import rmse 12 | 13 | from forecastga.helpers.logging import get_logger 14 | from forecastga.helpers.data import parse_data, train_test_split, select_seasonality 15 | from forecastga.ensembles import ( 16 | ensemble_lightgbm, 17 | ensemble_tsfresh, 18 | ensemble_pure, 19 | middle, 20 | ensemble_first, 21 | ensemble_doubled, 22 | ) 23 | from forecastga.models import MODELS 24 | from forecastga.helpers.ga_data import get_ga_data 25 | 26 | pd.plotting.register_matplotlib_converters() 27 | warnings.filterwarnings("ignore") 28 | 29 | _LOG = get_logger(__name__) 30 | 31 | 32 | class ModelConfig: 33 | def __init__( 34 | self, 35 | df: pd.Series, 36 | seasonality: str = "infer_from_data", 37 | forecast_len: int = 20, 38 | GPU: bool = torch.cuda.is_available(), 39 | ): 40 | 41 | self.df = df 42 | self.seasonality = seasonality 43 | self.forecast_len = forecast_len 44 | self.GPU = GPU 45 | 46 | self.in_sample = None 47 | self.train_df = None 48 | self.forecast_df = None 49 | self.seasons = None 50 | self.periods = None 51 | 52 | self.dataframe, self.freq = parse_data(df) 53 | 54 | def set_in_sample(self): 55 | 56 | self.in_sample = True 57 | 58 | self.train_df, self.forecast_df = train_test_split( 59 | self.dataframe, forecast_len=self.forecast_len 60 | ) 61 | self.forecast_len = len(self.forecast_df) 62 | 63 | self.seasons = select_seasonality(self.train_df, self.seasonality) 64 | self.periods = select_seasonality(self.train_df, "periodocity") 65 | 66 | def set_out_sample(self): 67 | 68 | self.in_sample = False 69 | 70 | self.train_df, self.forecast_df = self.dataframe["Target"], None 71 | 72 | self.seasons = select_seasonality(self.train_df, self.seasonality) 73 | self.periods = select_seasonality(self.train_df, "periodocity") 74 | 75 | 76 | class AutomatedModel: 77 | def __init__( 78 | self, 79 | data, 80 | model_list: list = [], 81 | seasonality: str = "infer_from_data", 82 | forecast_len: int = 20, 83 | GPU: bool = torch.cuda.is_available(), 84 | ): 85 | 86 | if isinstance(data, dict): 87 | self.df = get_ga_data(data) 88 | elif isinstance(data, pd.Series): 89 | self.df = data 90 | else: 91 | raise AttributeError( 92 | "First parameter must be a dict with GA property and time data, or a pandas series." 93 | ) 94 | self.model_list = model_list 95 | self.seasonality = seasonality 96 | self.forecast_len = forecast_len 97 | self.GPU = GPU 98 | self.models_dict = {} 99 | self.forecast_dict = {} 100 | 101 | self.config: ModelConfig = ModelConfig( 102 | self.df, 103 | seasonality=seasonality, 104 | forecast_len=forecast_len, 105 | GPU=GPU, 106 | ) 107 | 108 | def forecast_insample(self, **kwargs): 109 | 110 | self.config.set_in_sample() 111 | 112 | self.models_dict = self.__train_models(**kwargs) 113 | self.forecast_dict = self.__forecast_models() 114 | forecast_frame = self.forecast_dataframe( 115 | self.config.forecast_df, self.forecast_dict 116 | ) 117 | preformance = self.insample_performance(forecast_frame) 118 | 119 | _LOG.info("Successfully finished in sample forecast") 120 | 121 | return forecast_frame, preformance 122 | 123 | def forecast_outsample(self, **kwargs): 124 | 125 | self.config.set_out_sample() 126 | 127 | self.models_dict = self.__train_models(**kwargs) 128 | self.forecast_dict = self.__forecast_models() 129 | 130 | future_index = pd.date_range( 131 | self.config.dataframe.index[-1], 132 | periods=self.config.forecast_len + 1, 133 | freq=self.config.freq, 134 | )[1:] 135 | 136 | forecast_frame = self.forecast_dataframe( 137 | pd.Series(0, name="Target", index=future_index), self.forecast_dict 138 | ) 139 | 140 | _LOG.info("Successfully finished out of sample forecast") 141 | 142 | return forecast_frame 143 | 144 | def available_models(self): 145 | return [k for k, v in MODELS.items() if v["status"] == "active"] 146 | 147 | def __train_models(self, **kwargs): 148 | 149 | models_dict = {} 150 | 151 | for model_name in self.model_list: 152 | if model_name not in self.available_models(): 153 | _LOG.warning( 154 | "Model {} is not available. Skipping...".format(model_name) 155 | ) 156 | continue 157 | 158 | _LOG.info( 159 | "Model {} is being loaded and trained for {} prediction".format( 160 | model_name, 161 | "in sample" if self.config.in_sample else "out of sample", 162 | ) 163 | ) 164 | model_data = MODELS[model_name] 165 | module = importlib.import_module(model_data["loc"]) 166 | module_class = getattr(module, model_data["class"]) 167 | model = module_class(self.config) 168 | 169 | model.train(**kwargs) 170 | 171 | models_dict[model_name] = model 172 | 173 | return models_dict 174 | 175 | def __forecast_models(self, models_dict=None): 176 | 177 | models_dict = models_dict or self.models_dict 178 | 179 | forecast_dict = {} 180 | for model_name, model in models_dict.items(): 181 | 182 | _LOG.info( 183 | "Model {} is being used to forecast {}".format( 184 | model_name, 185 | "in sample" if self.config.in_sample else "out of sample", 186 | ) 187 | ) 188 | 189 | model.forecast() 190 | 191 | forecast_dict[model_name] = model.prediction 192 | 193 | return forecast_dict 194 | 195 | @staticmethod 196 | def forecast_dataframe(df, forecast_dict): 197 | insample = df.to_frame() 198 | for name, forecast in forecast_dict.items(): 199 | insample[name] = forecast 200 | return insample 201 | 202 | @staticmethod 203 | def insample_performance(forecast_frame, as_dict=False): 204 | 205 | dict_perf = {} 206 | for col, _ in forecast_frame.iteritems(): 207 | dict_perf[col] = {} 208 | dict_perf[col]["rmse"] = rmse(forecast_frame["Target"], forecast_frame[col]) 209 | dict_perf[col]["mse"] = dict_perf[col]["rmse"] ** 2 210 | dict_perf[col]["mean"] = forecast_frame[col].mean() 211 | if as_dict: 212 | return dict_perf 213 | 214 | return pd.DataFrame.from_dict(dict_perf) 215 | 216 | def ensemble(self, forecast_in, forecast_out): 217 | 218 | # TODO: Need to move `freq` to global model config. 219 | _LOG.error('Ensemble Not Final: Need to move `freq` to global model config.') 220 | _LOG.error('Returning `None` values.') 221 | return None, None, None 222 | 223 | season = self.seasonality 224 | 225 | _LOG.info("Building LightGBM Ensemble from TS data (ensemble_lgb)") 226 | 227 | ensemble_lgb_in, ensemble_lgb_out = ensemble_lightgbm( 228 | forecast_in, forecast_out, self.freq 229 | ) 230 | 231 | _LOG.info( 232 | "Building LightGBM Ensemble from PCA reduced TSFresh Features (ensemble_ts). This can take a long time." 233 | ) 234 | 235 | ensemble_ts_in, ensemble_ts_out = ensemble_tsfresh( 236 | forecast_in, forecast_out, season, self.freq 237 | ) 238 | 239 | _LOG.info("Building Standard First Level Ensemble") 240 | df_ensemble_in, df_ensemble_out = ensemble_pure(forecast_in, forecast_out) 241 | middle_out = middle(ensemble_lgb_out, ensemble_ts_out, df_ensemble_out) 242 | middle_in = middle(ensemble_lgb_in, ensemble_ts_in, df_ensemble_in) 243 | 244 | _LOG.info("Building Final Multi-level Ensemble") 245 | middle_in, _ = ensemble_first(middle_in, forecast_in) 246 | all_ensemble_in, all_ensemble_out, all_performance = ensemble_doubled( 247 | middle_in, middle_out, forecast_in, forecast_out 248 | ) 249 | 250 | return all_ensemble_in, all_ensemble_out, all_performance.T.sort_values("rmse") 251 | -------------------------------------------------------------------------------- /src/forecastga/ensembles.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | """ForecastGA: Ensembles""" 5 | 6 | import pandas as pd 7 | from sklearn.model_selection import train_test_split as tts 8 | from sklearn.decomposition import PCA 9 | from statsmodels.tools.eval_measures import rmse 10 | from tsfresh.utilities.dataframe_functions import impute, roll_time_series 11 | from tsfresh import extract_features 12 | from tsfresh import select_features 13 | import lightgbm as lgb 14 | 15 | from forecastga.helpers.logging import get_logger 16 | from forecastga.helpers.data import constant_feature_detect 17 | 18 | _LOG = get_logger(__name__) 19 | 20 | 21 | def ensemble_performance(forecasts): 22 | dict_perf = {} 23 | for col, _ in forecasts.iteritems(): 24 | dict_perf[col] = {} 25 | dict_perf[col]["rmse"] = rmse(forecasts["Target"], forecasts[col]) 26 | dict_perf[col]["mse"] = dict_perf[col]["rmse"] ** 2 27 | dict_perf[col]["mean"] = forecasts[col].mean() 28 | return pd.DataFrame.from_dict(dict_perf) 29 | 30 | 31 | def time_feature(df, perd): 32 | if perd in ["MS", "M", "BM", "BMS"]: 33 | df["month"] = df.index.month 34 | elif perd in ["BH", "H"]: 35 | df["hour"] = df.index.hour 36 | elif perd == "B": 37 | df["dayofweek"] = df.index.dayofweek 38 | elif perd == "D": 39 | df["dayofweek"] = df.index.dayofweek 40 | elif perd in ["W", "W-SUN", "W-MON", "W-TUE", "W-WED", "W-THU", "W-FRI", "W-SAT"]: 41 | df["week"] = df.index.week 42 | elif perd in ["Q", "QS", "BQ", "BQS"]: 43 | df["quarter"] = df.index.quarter 44 | elif perd in ["T", "min"]: 45 | df["minute"] = df.index.minute 46 | elif perd == "S": 47 | df["second"] = df.index.second 48 | # elif perd in ["L","ms"]: 49 | # periodocity = 1000 50 | # elif perd in ["U","us"]: 51 | # periodocity = 1000 52 | # elif perd=="N": 53 | # periodocity = 1000 54 | return df 55 | 56 | 57 | def ensemble_lightgbm(forecast_in, forecast_out, pred): 58 | 59 | forecast_in_copy = forecast_in.copy() 60 | 61 | forecast_in_copy = time_feature(forecast_in_copy, pred) 62 | forecast_in_copy["mean"] = forecast_in_copy.drop(["Target"], axis=1).mean(axis=1) 63 | forecast_train, forecast_test = tts( 64 | forecast_in_copy, train_size=0.5, shuffle=False, stratify=None 65 | ) 66 | 67 | target = "Target" 68 | d_train = lgb.Dataset( 69 | forecast_train.drop(columns=[target]), label=forecast_train[target] 70 | ) 71 | 72 | params = { 73 | "boosting_type": "gbdt", 74 | "objective": "regression", 75 | "metric": "rmsle", 76 | "max_depth": 6, 77 | "learning_rate": 0.1, 78 | "verbose": 0, 79 | "num_threads": 16, 80 | } 81 | 82 | model = lgb.train(params, d_train, 100, verbose_eval=1) 83 | 84 | ensemble_lgb = pd.DataFrame(index=forecast_test.index) 85 | 86 | ensemble_lgb["ensemble_lgb"] = model.predict(forecast_test.drop(columns=[target])) 87 | 88 | ensemble_lgb_out = pd.DataFrame(index=forecast_out.index) 89 | 90 | ensemble_lgb_out["ensemble_lgb"] = model.predict(forecast_out) 91 | 92 | return ensemble_lgb, ensemble_lgb_out 93 | 94 | 95 | def ensemble_tsfresh(forecast_in, forecast_out, season, perd): 96 | """ 97 | Create rolled time series for ts feature extraction 98 | """ 99 | 100 | def tsfresh_run(forecast, season, insample=True, forecast_out=None): 101 | df_roll_prep = forecast.reset_index() 102 | if insample: 103 | df_roll_prep = df_roll_prep.drop(["Target", "Date"], axis=1) 104 | df_roll_prep["id"] = 1 105 | target = forecast["Target"] 106 | else: 107 | df_roll_prep = df_roll_prep.drop(["index"], axis=1) 108 | df_roll_prep["id"] = 1 109 | 110 | df_roll = roll_time_series( 111 | df_roll_prep, 112 | column_id="id", 113 | column_sort=None, 114 | column_kind=None, 115 | rolling_direction=1, 116 | max_timeshift=season - 1, 117 | ) 118 | counts = df_roll["id"].value_counts() 119 | df_roll_cut = df_roll[df_roll["id"].isin(counts[counts >= season].index)] 120 | 121 | # TS feature extraction 122 | concat_df = pd.DataFrame() 123 | 124 | concat_df = extract_features( 125 | df_roll_cut.ffill(), 126 | column_id="id", 127 | column_sort="sort", 128 | n_jobs=season, 129 | show_warnings=False, 130 | disable_progressbar=True, 131 | ) 132 | 133 | if insample: 134 | 135 | concat_df = concat_df.dropna(axis=1, how="all") 136 | concat_df.index = ( 137 | target[df_roll_cut["id"].value_counts().index] 138 | .sort_index() 139 | .to_frame() 140 | .index 141 | ) 142 | concat_df = pd.merge( 143 | target[df_roll_cut["id"].value_counts().index].sort_index().to_frame(), 144 | concat_df, 145 | left_index=True, 146 | right_index=True, 147 | how="left", 148 | ) 149 | concat_df_list = constant_feature_detect(data=concat_df, threshold=0.95) 150 | concat_df = concat_df.drop(concat_df_list, axis=1) 151 | else: 152 | forecast_out.index.name = "Date" 153 | concat_df.index = forecast_out.index 154 | 155 | concat_df = impute(concat_df) 156 | 157 | return concat_df 158 | 159 | _LOG.info("LightGBM ensemble have been successfully built") 160 | 161 | concat_df_drop_in = tsfresh_run(forecast_in, season, insample=True) 162 | 163 | extracted_n_selected = select_features( 164 | concat_df_drop_in.drop("Target", axis=1), 165 | concat_df_drop_in["Target"], 166 | fdr_level=0.01, 167 | n_jobs=12, 168 | ) # fdr is the significance level. 169 | 170 | forecast_out_add = pd.concat( 171 | (forecast_in.iloc[-season + 1 :, :].drop(["Target"], axis=1), forecast_out), 172 | axis=0, 173 | ) 174 | concat_df_drop_out = tsfresh_run( 175 | forecast_out_add, season, insample=False, forecast_out=forecast_out 176 | ) 177 | extracted_n_selected_out = concat_df_drop_out[extracted_n_selected.columns] 178 | 179 | # Reduce the dimensions of generated time series features 180 | pca2 = PCA(n_components=8) 181 | pca2.fit(extracted_n_selected) 182 | pca2_results_in = pca2.transform(extracted_n_selected) 183 | pca2_results_out = pca2.transform(extracted_n_selected_out) 184 | 185 | cols = 0 186 | for i in range(pca2_results_in.shape[1]): 187 | cols = cols + 1 188 | extracted_n_selected["pca_" + str(i)] = pca2_results_in[:, i] 189 | extracted_n_selected_out["pca_" + str(i)] = pca2_results_out[:, i] 190 | 191 | df = forecast_in.iloc[season - 1 :, :].copy() 192 | df = time_feature(df, perd) 193 | df["mean"] = df.drop(["Target"], axis=1).mean(axis=1) 194 | 195 | df_new = pd.concat( 196 | (df.reset_index(), extracted_n_selected.iloc[:, -cols:].reset_index(drop=True)), 197 | axis=1, 198 | ) 199 | df_new = df_new.set_index("Date") 200 | forecast_train, forecast_test = tts( 201 | df_new, train_size=0.5, shuffle=False, stratify=None 202 | ) 203 | target = "Target" 204 | d_train = lgb.Dataset( 205 | forecast_train.drop(columns=[target]), label=forecast_train[target] 206 | ) 207 | 208 | params = { 209 | "boosting_type": "gbdt", 210 | "objective": "regression", 211 | "metric": "rmsle", 212 | "max_depth": 6, 213 | "learning_rate": 0.1, 214 | "verbose": 0, 215 | "num_threads": 16, 216 | } 217 | 218 | model = lgb.train(params, d_train, 100, verbose_eval=1) 219 | 220 | ensemble_ts = pd.DataFrame(index=forecast_test.index) 221 | 222 | ensemble_ts["ensemble_ts"] = model.predict(forecast_test.drop(columns=[target])) 223 | 224 | df_out = forecast_out.copy() 225 | df_out = time_feature(df_out, perd) 226 | df_out["mean"] = df_out.mean(axis=1) 227 | 228 | ensemble_ts_out = pd.DataFrame(index=df_out.index) 229 | ensemble_ts_out["ensemble_ts"] = model.predict(df_out) 230 | 231 | _LOG.info("LightGBM ensemble have been successfully built") 232 | 233 | return ensemble_ts, ensemble_ts_out 234 | 235 | 236 | def ensemble_pure(forecast_in, forecast_out): 237 | """ 238 | Pure Emsemble 239 | """ 240 | 241 | df_perf = ensemble_performance(forecast_in).drop("Target", axis=1) 242 | 243 | def run_ensemble(df_perf, forecast): 244 | 245 | many = len(df_perf.iloc[0, :].sort_values()) 246 | 247 | # Note these can fail, should see if that many indices actually exists. 248 | df_ensemble = pd.DataFrame(index=forecast.index) 249 | if many == 1: 250 | ValueError("You need more than one model to ensemble.") 251 | if many >= 2: 252 | df_ensemble[ 253 | "_".join(list(df_perf.iloc[0, :].sort_values()[:2].index.values)) 254 | ] = forecast[list(df_perf.iloc[0, :].sort_values()[:2].index)].mean(axis=1) 255 | if many >= 3: 256 | df_ensemble[ 257 | "_".join(list(df_perf.iloc[0, :].sort_values()[:3].index.values)) 258 | ] = forecast[list(df_perf.iloc[0, :].sort_values()[:3].index)].mean(axis=1) 259 | if many >= 4: 260 | df_ensemble[ 261 | "_".join(list(df_perf.iloc[0, :].sort_values()[:4].index.values)) 262 | ] = forecast[list(df_perf.iloc[0, :].sort_values()[:4].index)].mean(axis=1) 263 | 264 | return df_ensemble 265 | 266 | df_ensemble_in = run_ensemble(df_perf, forecast_in) 267 | df_ensemble_out = run_ensemble(df_perf, forecast_out) 268 | 269 | return df_ensemble_in, df_ensemble_out 270 | 271 | 272 | def middle(ensemble_lgb, ensemble_ts, pure_ensemble): 273 | first_merge = pd.merge( 274 | ensemble_ts, ensemble_lgb, left_index=True, right_index=True, how="left" 275 | ) 276 | second_merge = pd.merge( 277 | first_merge, pure_ensemble, left_index=True, right_index=True, how="left" 278 | ) 279 | return second_merge 280 | 281 | 282 | def ensemble_first(middle_in, forecast_in): 283 | third_merge = pd.merge( 284 | middle_in, 285 | forecast_in[["Target"]], 286 | left_index=True, 287 | right_index=True, 288 | how="left", 289 | ) 290 | return third_merge, ensemble_performance(third_merge).drop("Target", axis=1) 291 | 292 | 293 | def ensemble_doubled(middle_in, middle_out, forecast_in, forecast_out): 294 | 295 | third_merge_in = pd.merge( 296 | middle_in.drop(["Target"], axis=1), 297 | forecast_in, 298 | left_index=True, 299 | right_index=True, 300 | how="left", 301 | ) 302 | third_merge_out = pd.merge( 303 | middle_out, forecast_out, left_index=True, right_index=True, how="left" 304 | ) 305 | 306 | # Double Ensemble 307 | df_perf = ensemble_performance(third_merge_in).drop("Target", axis=1) 308 | 309 | def inner_ensemble(df_perf, third_merge): 310 | df_ensemble = pd.DataFrame(index=third_merge.index) 311 | # Note these can fail, should see if that many indices actually exists. 312 | 313 | many = len(df_perf.iloc[0, :].sort_values()) 314 | 315 | if many == 1: 316 | ValueError("You need more than one model to ensemble.") 317 | if many >= 2: 318 | df_ensemble[ 319 | "__X__".join(list(df_perf.iloc[0, :].sort_values()[:2].index.values)) 320 | ] = third_merge[list(df_perf.iloc[0, :].sort_values()[:2].index)].mean( 321 | axis=1 322 | ) 323 | if many >= 3: 324 | df_ensemble[ 325 | "__X__".join(list(df_perf.iloc[0, :].sort_values()[:3].index.values)) 326 | ] = third_merge[list(df_perf.iloc[0, :].sort_values()[:3].index)].mean( 327 | axis=1 328 | ) 329 | if many >= 5: 330 | df_ensemble[ 331 | "__X__".join(list(df_perf.iloc[0, :].sort_values()[:5].index.values)) 332 | ] = third_merge[list(df_perf.iloc[0, :].sort_values()[:5].index)].mean( 333 | axis=1 334 | ) 335 | if many >= 7: 336 | df_ensemble[ 337 | "__X__".join(list(df_perf.iloc[0, :].sort_values()[:7].index.values)) 338 | ] = third_merge[list(df_perf.iloc[0, :].sort_values()[:7].index)].mean( 339 | axis=1 340 | ) 341 | return df_ensemble 342 | 343 | df_ensembled_in = inner_ensemble(df_perf, third_merge_in) 344 | df_ensembled_out = inner_ensemble(df_perf, third_merge_out) 345 | 346 | last_merge_in = pd.merge( 347 | third_merge_in, df_ensembled_in, left_index=True, right_index=True, how="left" 348 | ) # .drop(["month","mean"],axis=1) 349 | last_merge_out = pd.merge( 350 | third_merge_out, df_ensembled_out, left_index=True, right_index=True, how="left" 351 | ) 352 | 353 | df_perf_last = ensemble_performance(last_merge_in).drop("Target", axis=1) 354 | 355 | return last_merge_in, last_merge_out, df_perf_last 356 | -------------------------------------------------------------------------------- /src/forecastga/ga/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ Google Analytics Package""" 3 | 4 | from . import ( 5 | auth, 6 | utils, 7 | account, 8 | blueprint, 9 | columns, 10 | errors, 11 | query, 12 | segments, 13 | ) 14 | from .auth import authenticate, authorize, revoke 15 | from .blueprint import Blueprint 16 | -------------------------------------------------------------------------------- /src/forecastga/ga/account.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | """Google Analytics Account""" 4 | 5 | import yaml 6 | import addressable 7 | 8 | from . import utils 9 | from . import query 10 | from . import columns 11 | from .columns import Column, Segment, ColumnList, SegmentList 12 | 13 | 14 | class Account: 15 | """ 16 | An account is usually but not always associated with a single 17 | website. It will often contain multiple web properties 18 | (different parts of your website that you've configured 19 | Google Analytics to analyze separately, or simply the default 20 | web property that every website has in Google Analytics), 21 | which in turn will have one or more profiles. 22 | 23 | You should navigate to a profile to run queries. 24 | 25 | ```python 26 | import googleanalytics as ga 27 | accounts = ga.authenticate() 28 | profile = accounts['debrouwere.org'].webproperties['UA-12933299-1'].profiles['debrouwere.org'] 29 | report = profile.core.query('pageviews').range('2014-10-01', '2014-10-31').get() 30 | print(report['pageviews']) 31 | ``` 32 | """ 33 | 34 | def __init__(self, raw, service, credentials): 35 | self.service = service 36 | self.credentials = credentials 37 | self.raw = raw 38 | self.id = raw["id"] 39 | self.name = raw["name"] 40 | self.permissions = raw["permissions"]["effective"] 41 | 42 | @property 43 | @utils.memoize 44 | def webproperties(self): 45 | """ 46 | A list of all web properties on this account. You may 47 | select a specific web property using its name, its id 48 | or an index. 49 | 50 | ```python 51 | account.webproperties[0] 52 | account.webproperties['UA-9234823-5'] 53 | account.webproperties['debrouwere.org'] 54 | ``` 55 | """ 56 | 57 | raw_properties = ( 58 | self.service.management() 59 | .webproperties() 60 | .list(accountId=self.id) 61 | .execute()["items"] 62 | ) 63 | _webproperties = [WebProperty(raw, self) for raw in raw_properties] 64 | return addressable.List( 65 | _webproperties, indices=["id", "name"], insensitive=True 66 | ) 67 | 68 | @property 69 | def query(self, *vargs, **kwargs): 70 | """ A shortcut to the first profile of the first webproperty. """ 71 | return self.webproperties[0].query(*vargs, **kwargs) 72 | 73 | def __repr__(self): 74 | return "".format( 75 | self.name, self.id 76 | ) 77 | 78 | 79 | class WebProperty: 80 | """ 81 | A web property is a particular website you're tracking in Google Analytics. 82 | It has one or more profiles, and you will need to pick one from which to 83 | launch your queries. 84 | """ 85 | 86 | def __init__(self, raw, account): 87 | self.account = account 88 | self.raw = raw 89 | self.id = raw["id"] 90 | self.name = raw["name"] 91 | # on rare occassions, e.g. for abandoned web properties, 92 | # a website url might not be present 93 | self.url = raw.get("websiteUrl") 94 | 95 | @property 96 | def profile(self): 97 | default = self.raw["defaultProfileId"] 98 | return self.profiles[default] 99 | 100 | @property 101 | @utils.memoize 102 | def profiles(self): 103 | """ 104 | A list of all profiles on this web property. You may 105 | select a specific profile using its name, its id 106 | or an index. 107 | 108 | ```python 109 | property.profiles[0] 110 | property.profiles['9234823'] 111 | property.profiles['marketing profile'] 112 | ``` 113 | """ 114 | raw_profiles = ( 115 | self.account.service.management() 116 | .profiles() 117 | .list(accountId=self.account.id, webPropertyId=self.id) 118 | .execute()["items"] 119 | ) 120 | profiles = [Profile(raw, self) for raw in raw_profiles] 121 | return addressable.List(profiles, indices=["id", "name"], insensitive=True) 122 | 123 | def query(self, *vargs, **kwargs): 124 | """ 125 | A shortcut to the first profile of this webproperty. 126 | """ 127 | return self.profiles[0].query(*vargs, **kwargs) 128 | 129 | def __repr__(self): 130 | return "".format( 131 | self.name, self.id 132 | ) 133 | 134 | 135 | class Profile: 136 | """ 137 | A profile is a particular analytics configuration of a web property. 138 | Each profile belongs to a web property and an account. As all 139 | queries using the Google Analytics API run against a particular 140 | profile, queries can only be created from a `Profile` object. 141 | 142 | ```python 143 | profile.query('pageviews').range('2014-01-01', days=7).get() 144 | ``` 145 | """ 146 | 147 | def __init__(self, raw, webproperty): 148 | self.raw = raw 149 | self.webproperty = webproperty 150 | self.account = webproperty.account 151 | self.id = raw["id"] 152 | self.name = raw["name"] 153 | self.core = CoreReportingAPI(self) 154 | self.realtime = RealTimeReportingAPI(self) 155 | 156 | def __repr__(self): 157 | return "".format( 158 | self.name, self.id 159 | ) 160 | 161 | 162 | class ReportingAPI: 163 | REPORT_TYPES = { 164 | "ga": "ga", 165 | "realtime": "rt", 166 | } 167 | 168 | QUERY_TYPES = { 169 | "ga": query.CoreQuery, 170 | "realtime": query.RealTimeQuery, 171 | } 172 | 173 | def __init__(self, endpoint, profile): 174 | """ 175 | Endpoint can be one of `ga` or `realtime`. 176 | """ 177 | 178 | # various shortcuts 179 | self.profile = profile 180 | self.account = profile.account 181 | self.service = service = profile.account.service 182 | root = service.data() 183 | self.endpoint_type = endpoint 184 | self.endpoint = getattr(root, endpoint)() 185 | 186 | # query interface 187 | self.report_type = self.REPORT_TYPES[endpoint] 188 | query_class = self.QUERY_TYPES[endpoint] 189 | self.query = query_class(self) 190 | 191 | # optional caching layer 192 | self.cache = None 193 | 194 | @property 195 | @utils.memoize 196 | def all_columns(self): 197 | query_service = ( 198 | self.service.metadata().columns().list(reportType=self.report_type) 199 | ) 200 | raw_columns = query_service.execute()["items"] 201 | hydrated_columns = utils.flatten(map(Column.from_metadata, raw_columns)) 202 | return ColumnList(hydrated_columns, unique=False) 203 | 204 | @property 205 | @utils.memoize 206 | def columns(self): 207 | return addressable.filter(columns.is_supported, self.all_columns) 208 | 209 | @property 210 | @utils.memoize 211 | def segments(self): 212 | query_service = self.service.management().segments().list() 213 | raw_segments = query_service.execute()["items"] 214 | hydrated_segments = [Segment(raw, self) for raw in raw_segments] 215 | return SegmentList(hydrated_segments) 216 | 217 | @property 218 | @utils.memoize 219 | def metrics(self): 220 | return addressable.filter(columns.is_metric, self.columns) 221 | 222 | @property 223 | @utils.memoize 224 | def dimensions(self): 225 | return addressable.filter(columns.is_dimension, self.columns) 226 | 227 | @property 228 | @utils.memoize 229 | def goals(self): 230 | raise NotImplementedError() 231 | 232 | def __repr__(self): 233 | return "".format(self.__class__.__name__) 234 | 235 | 236 | class CoreReportingAPI(ReportingAPI): 237 | def __init__(self, profile): 238 | super(CoreReportingAPI, self).__init__("ga", profile) 239 | 240 | 241 | class RealTimeReportingAPI(ReportingAPI): 242 | def __init__(self, profile): 243 | super(RealTimeReportingAPI, self).__init__("realtime", profile) 244 | 245 | # in principle, we should be able to reuse everything from the ReportingAPI 246 | # base class, but the Real Time Reporting API is still in beta and some 247 | # things – like a metadata endpoint – are missing. 248 | @property 249 | @utils.memoize 250 | def all_columns(self): 251 | raw_columns = yaml.load(open(utils.here("realtime.yml"))) 252 | hydrated_columns = utils.flatten(map(Column.from_metadata, raw_columns)) 253 | return ColumnList(hydrated_columns) 254 | -------------------------------------------------------------------------------- /src/forecastga/ga/auth/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | """ 4 | Convenience functions for authenticating with Google 5 | and asking for authorization with Google, with 6 | `authenticate` at its core. 7 | 8 | `authenticate` will do what it says on the tin, but unlike 9 | the basic `googleanalytics.oauth.authenticate`, it also tries 10 | to get existing credentials from the keyring, from environment 11 | variables, it prompts for information when required and so on. 12 | """ 13 | import re 14 | 15 | from . import oauth 16 | from .oauth import Flow, Credentials 17 | 18 | 19 | def navigate( 20 | accounts, 21 | account=None, 22 | webproperty=None, 23 | profile=None, 24 | ga_url=None, 25 | default_profile=True, 26 | ): 27 | 28 | if ga_url: 29 | return get_profile_from_url(accounts, ga_url) 30 | 31 | if webproperty and not account: 32 | raise KeyError( 33 | "Cannot navigate to a webproperty or profile without knowing the account." 34 | ) 35 | if profile and not (webproperty and account): 36 | raise KeyError( 37 | "Cannot navigate to a profile without knowing account and webproperty." 38 | ) 39 | 40 | if profile: 41 | return accounts[account].webproperties[webproperty].profiles[profile] 42 | elif webproperty: 43 | scope = accounts[account].webproperties[webproperty] 44 | if default_profile: 45 | return scope.profile 46 | else: 47 | return scope 48 | elif account: 49 | return accounts[account] 50 | else: 51 | return accounts 52 | 53 | 54 | def get_profile_from_url(accounts, ga_url): 55 | 56 | if isinstance(ga_url, str) and "https://analytics.google.com/" in ga_url: 57 | 58 | psearch = re.search( 59 | "^https:\/\/analytics\.google\.com\/analytics\/web\/.*\/a(?P[0-9]+)w(?P[0-9]+)p(?P

[0-9]+).*$", 60 | str(ga_url), 61 | re.IGNORECASE, 62 | ) 63 | 64 | if len(psearch.groups()) == 3: 65 | return get_profile(accounts, psearch["a"], psearch["w"], psearch["p"]) 66 | 67 | else: 68 | error = "The URL was not correct. it should include a portion matching `/a23337837w45733833p149423361/`" 69 | 70 | else: 71 | error = "The url provided should start with `https://analytics.google.com\/`" 72 | 73 | raise KeyError(error) 74 | 75 | 76 | def get_profile(accounts, account, webproperty, profile): 77 | 78 | try: 79 | 80 | account = accounts[account] 81 | webproperty = [ 82 | w 83 | for w in account.webproperties 84 | if w.raw["internalWebPropertyId"] == webproperty 85 | ][0] 86 | profile = webproperty.profiles[profile] 87 | 88 | return profile 89 | 90 | except Exception as e: 91 | print("Unknown Exception:", str(e)) 92 | return None 93 | 94 | 95 | def find(**kwargs): 96 | return oauth.Credentials.find(**kwargs) 97 | 98 | 99 | def identity(name): 100 | return find(identity=name) 101 | 102 | 103 | def authenticate( 104 | client_id=None, 105 | client_secret=None, 106 | client_email=None, 107 | private_key=None, 108 | access_token=None, 109 | refresh_token=None, 110 | account=None, 111 | webproperty=None, 112 | profile=None, 113 | ga_url=None, 114 | identity=None, 115 | prefix=None, 116 | suffix=None, 117 | interactive=False, 118 | save=False, 119 | ): 120 | """ 121 | The `authenticate` function will authenticate the user with the Google Analytics API, 122 | using a variety of strategies: keyword arguments provided to this function, credentials 123 | stored in in environment variables, credentials stored in the keychain and, finally, by 124 | asking for missing information interactively in a command-line prompt. 125 | 126 | If necessary (but only if `interactive=True`) this function will also allow the user 127 | to authorize this Python module to access Google Analytics data on their behalf, 128 | using an OAuth2 token. 129 | """ 130 | 131 | credentials = oauth.Credentials.find( 132 | valid=True, 133 | interactive=interactive, 134 | prefix=prefix, 135 | suffix=suffix, 136 | client_id=client_id, 137 | client_secret=client_secret, 138 | client_email=client_email, 139 | private_key=private_key, 140 | access_token=access_token, 141 | refresh_token=refresh_token, 142 | identity=identity, 143 | ) 144 | 145 | if credentials.incomplete: 146 | if interactive: 147 | credentials = authorize( 148 | client_id=credentials.client_id, 149 | client_secret=credentials.client_secret, 150 | save=save, 151 | identity=credentials.identity, 152 | prefix=prefix, 153 | suffix=suffix, 154 | ) 155 | elif credentials.type == 2: 156 | credentials = authorize( 157 | client_email=credentials.client_email, 158 | private_key=credentials.private_key, 159 | identity=credentials.identity, 160 | save=save, 161 | ) 162 | else: 163 | raise KeyError( 164 | "Cannot authenticate: enable interactive authorization, pass a token or use a service account." 165 | ) 166 | 167 | accounts = oauth.authenticate(credentials) 168 | scope = navigate( 169 | accounts, 170 | account=account, 171 | webproperty=webproperty, 172 | profile=profile, 173 | ga_url=ga_url, 174 | ) 175 | return scope 176 | 177 | 178 | def authorize( 179 | client_id=None, 180 | client_secret=None, 181 | client_email=None, 182 | private_key=None, 183 | save=False, 184 | identity=None, 185 | prefix=None, 186 | suffix=None, 187 | ): 188 | base_credentials = oauth.Credentials.find( 189 | valid=True, 190 | interactive=True, 191 | identity=identity, 192 | client_id=client_id, 193 | client_secret=client_secret, 194 | client_email=client_email, 195 | private_key=private_key, 196 | prefix=prefix, 197 | suffix=suffix, 198 | ) 199 | 200 | if base_credentials.incomplete: 201 | credentials = oauth.authorize( 202 | base_credentials.client_id, base_credentials.client_secret 203 | ) 204 | credentials.identity = base_credentials.identity 205 | else: 206 | credentials = base_credentials 207 | 208 | return credentials 209 | 210 | 211 | def revoke( 212 | client_id, 213 | client_secret, 214 | client_email=None, 215 | private_key=None, 216 | access_token=None, 217 | refresh_token=None, 218 | identity=None, 219 | prefix=None, 220 | suffix=None, 221 | ): 222 | 223 | """ 224 | Given a client id, client secret and either an access token or a refresh token, 225 | revoke OAuth access to the Google Analytics data and remove any stored credentials 226 | that use these tokens. 227 | """ 228 | 229 | if client_email and private_key: 230 | raise ValueError("Two-legged OAuth does not use revokable tokens.") 231 | 232 | credentials = oauth.Credentials.find( 233 | complete=True, 234 | interactive=False, 235 | identity=identity, 236 | client_id=client_id, 237 | client_secret=client_secret, 238 | access_token=access_token, 239 | refresh_token=refresh_token, 240 | prefix=prefix, 241 | suffix=suffix, 242 | ) 243 | 244 | retval = credentials.revoke() 245 | return retval 246 | -------------------------------------------------------------------------------- /src/forecastga/ga/auth/credentials.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import os 4 | from copy import copy 5 | 6 | import httplib2 7 | import oauth2client 8 | import inspector 9 | 10 | from .. import utils 11 | 12 | 13 | def from_params(**params): 14 | credentials = {} 15 | for key, value in params.items(): 16 | if key in ( 17 | "client_id", 18 | "client_secret", 19 | "client_email", 20 | "private_key", 21 | "access_token", 22 | "refresh_token", 23 | "identity", 24 | ): 25 | credentials[key] = value 26 | return credentials 27 | 28 | 29 | def from_environment(prefix=None, suffix=None, **params): 30 | keys = { 31 | "client_id": utils.affix(prefix, "GOOGLE_ANALYTICS_CLIENT_ID", suffix), 32 | "client_secret": utils.affix(prefix, "GOOGLE_ANALYTICS_CLIENT_SECRET", suffix), 33 | "refresh_token": utils.affix(prefix, "GOOGLE_ANALYTICS_REFRESH_TOKEN", suffix), 34 | } 35 | 36 | credentials = {} 37 | for credential, key in keys.items(): 38 | value = os.environ.get(key) 39 | if value: 40 | credentials[credential] = value 41 | 42 | return credentials 43 | 44 | 45 | def from_prompt(**params): 46 | prompted = {} 47 | 48 | if not params.get("identity"): 49 | prompted["identity"] = utils.input("Human-readable account name: ") 50 | if not params.get("client_id"): 51 | prompted["client_id"] = utils.input("Client ID: ") 52 | if not params.get("client_secret"): 53 | prompted["client_secret"] = utils.input("Client secret: ") 54 | 55 | return prompted 56 | 57 | 58 | class Credentials(object): 59 | STRATEGIES = { 60 | "params": from_params, 61 | "environment": from_environment, 62 | "prompt": from_prompt, 63 | } 64 | 65 | INTERACTIVE_STRATEGIES = ["params", "environment", "prompt"] 66 | UNSUPERVISED_STRATEGIES = ["params", "environment"] 67 | 68 | @classmethod 69 | def find(cls, interactive=False, valid=False, complete=False, **params): 70 | if interactive: 71 | strategies = copy(cls.INTERACTIVE_STRATEGIES) 72 | else: 73 | strategies = copy(cls.UNSUPERVISED_STRATEGIES) 74 | 75 | attempted = ", ".join(strategies) 76 | 77 | credentials = cls() 78 | while credentials.incomplete and len(strategies): 79 | strategy = strategies.pop(0) 80 | properties = cls.STRATEGIES[strategy](**params) or {} 81 | for key, value in properties.items(): 82 | if not getattr(credentials, key): 83 | setattr(credentials, key, value) 84 | if not params.get(key): 85 | params[key] = value 86 | 87 | # the environment variable suffix is often a good 88 | # descriptor of the nature of these credentials, 89 | # when lacking anything better 90 | if params.get("identity"): 91 | credentials.identity = params["identity"] 92 | elif params.get("suffix") and credentials.identity is credentials.client_id: 93 | credentials.identity = params.get("suffix") 94 | 95 | if complete and credentials.incomplete: 96 | raise KeyError( 97 | "Could not find client credentials and token. Tried {attempted}.".format( 98 | attempted=attempted 99 | ) 100 | ) 101 | elif valid and credentials.invalid: 102 | raise KeyError( 103 | "Could not find client id and client secret. Tried {attempted}.".format( 104 | attempted=attempted 105 | ) 106 | ) 107 | else: 108 | return credentials 109 | 110 | def __init__( 111 | self, 112 | client_id=None, 113 | client_secret=None, 114 | client_email=None, 115 | private_key=None, 116 | access_token=None, 117 | refresh_token=None, 118 | identity=None, 119 | ): 120 | self.client_id = client_id 121 | self.client_secret = client_secret 122 | self.client_email = client_email 123 | self.private_key = private_key 124 | self.access_token = access_token 125 | self.refresh_token = refresh_token 126 | self._identity = identity 127 | 128 | @property 129 | def token(self): 130 | return self.refresh_token or self.access_token 131 | 132 | @property 133 | def identity(self): 134 | return self._identity or self.client_id 135 | 136 | @identity.setter 137 | def identity(self, value): 138 | self._identity = value 139 | 140 | @property 141 | def type(self): 142 | if self.client_email and self.private_key: 143 | return 2 144 | elif self.client_id and self.client_secret: 145 | return 3 146 | else: 147 | return None 148 | 149 | @property 150 | def valid(self): 151 | """Valid credentials are not necessarily correct, but 152 | they contain all necessary information for an 153 | authentication attempt.""" 154 | two_legged = self.client_email and self.private_key 155 | three_legged = self.client_id and self.client_secret 156 | return two_legged or three_legged or False 157 | 158 | @property 159 | def invalid(self): 160 | return not self.valid 161 | 162 | @property 163 | def complete(self): 164 | """ Complete credentials are valid and are either two-legged or include a token. """ 165 | return self.valid and ( 166 | self.access_token or self.refresh_token or self.type == 2 167 | ) 168 | 169 | @property 170 | def incomplete(self): 171 | return not self.complete 172 | 173 | @property 174 | def oauth(self): 175 | if self.incomplete: 176 | return None 177 | else: 178 | if self.type == 2: 179 | return oauth2client.client.SignedJwtAssertionCredentials( 180 | service_account_name=self.client_email, 181 | private_key=self.private_key.encode("utf-8"), 182 | scope="https://www.googleapis.com/auth/analytics.readonly", 183 | ) 184 | else: 185 | return oauth2client.client.OAuth2Credentials( 186 | access_token=self.access_token, 187 | client_id=self.client_id, 188 | client_secret=self.client_secret, 189 | refresh_token=self.refresh_token, 190 | token_expiry=None, 191 | token_uri=oauth2client.GOOGLE_TOKEN_URI, 192 | user_agent=None, 193 | revoke_uri=oauth2client.GOOGLE_REVOKE_URI, 194 | id_token=None, 195 | token_response=None, 196 | ) 197 | 198 | def serialize(self): 199 | return { 200 | "identity": self.identity, 201 | "client_id": self.client_id, 202 | "client_secret": self.client_secret, 203 | "client_email": self.client_email, 204 | "private_key": self.private_key, 205 | "access_token": self.access_token, 206 | "refresh_token": self.refresh_token, 207 | } 208 | 209 | def authorize(self): 210 | return self.oauth.authorize(httplib2.Http()) 211 | 212 | def revoke(self): 213 | if not self.token: 214 | raise KeyError("Cannot revoke a token when no token was provided.") 215 | 216 | # `credentials.revoke` will try to revoke the refresh token even 217 | # if it's None, which will fail, so we have to miss with the innards 218 | # of oauth2client here a little bit 219 | return self.oauth._do_revoke(httplib2.Http().request, self.token) 220 | 221 | 222 | def normalize(fn): 223 | @inspector.changes(fn) 224 | def normalized_fn( 225 | client_id=None, 226 | client_secret=None, 227 | access_token=None, 228 | refresh_token=None, 229 | identity=None, 230 | ): 231 | 232 | if isinstance(client_id, Credentials): 233 | credentials = client_id 234 | else: 235 | credentials = Credentials( 236 | client_id, client_secret, access_token, refresh_token, identity 237 | ) 238 | 239 | return fn(credentials) 240 | 241 | return normalized_fn 242 | -------------------------------------------------------------------------------- /src/forecastga/ga/auth/oauth.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import json 4 | import webbrowser 5 | 6 | import addressable 7 | from oauth2client import client 8 | from apiclient import discovery 9 | 10 | from forecastga.ga import utils, account 11 | from .credentials import Credentials, normalize 12 | 13 | 14 | class Flow(client.OAuth2WebServerFlow): 15 | def __init__(self, client_id, client_secret, redirect_uri): 16 | super(Flow, self).__init__( 17 | client_id, 18 | client_secret, 19 | scope="https://www.googleapis.com/auth/analytics.readonly", 20 | redirect_uri=redirect_uri, 21 | ) 22 | 23 | def step2_exchange(self, code): 24 | credentials = super(Flow, self).step2_exchange(code) 25 | return Credentials.find(complete=True, **credentials.__dict__) 26 | 27 | 28 | # a simplified version of `oauth2client.tools.run_flow` 29 | def authorize(client_id, client_secret): 30 | flow = Flow(client_id, client_secret, redirect_uri="urn:ietf:wg:oauth:2.0:oob") 31 | 32 | authorize_url = flow.step1_get_authorize_url() 33 | print("Go to the following link in your browser: " + authorize_url) 34 | code = input("Enter verification code: ").strip() 35 | return flow.step2_exchange(code) 36 | 37 | 38 | @normalize 39 | def revoke(credentials): 40 | return credentials.revoke() 41 | 42 | 43 | @normalize 44 | def authenticate(credentials): 45 | client = credentials.authorize() 46 | service = discovery.build("analytics", "v3", http=client, cache_discovery=False) 47 | raw_accounts = service.management().accounts().list().execute()["items"] 48 | accounts = [account.Account(raw, service, credentials) for raw in raw_accounts] 49 | return addressable.List(accounts, indices=["id", "name"], insensitive=True) 50 | -------------------------------------------------------------------------------- /src/forecastga/ga/blueprint.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | from forecastga import ga 4 | 5 | 6 | class Blueprint: 7 | def __init__(self, description): 8 | self.raw = description 9 | self.scope = description.get("scope") 10 | self.defaults = description.get("defaults") 11 | self._identity = description.get("identity") 12 | self._queries = description.get("queries") 13 | 14 | @property 15 | def identity(self): 16 | data = self._identity 17 | if data: 18 | if isinstance(data, ga.utils.basestring): 19 | return dict(identity=data) 20 | if isinstance(data, dict): 21 | return data 22 | 23 | return None 24 | 25 | def queries(self, profile): 26 | base = ga.query.describe(profile, self.defaults) 27 | 28 | queries = [] 29 | for title, description in self._queries.items(): 30 | query = ga.query.refine(base, description) 31 | query.title = title 32 | queries.append(query) 33 | 34 | return queries 35 | -------------------------------------------------------------------------------- /src/forecastga/ga/columns.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import functools 4 | import re 5 | 6 | import addressable 7 | 8 | 9 | from snakify import snakify 10 | 11 | from . import utils 12 | 13 | TYPES = { 14 | "STRING": utils.unicode, 15 | "INTEGER": int, 16 | "FLOAT": float, 17 | "PERCENT": float, 18 | "TIME": float, 19 | "CURRENCY": float, 20 | } 21 | 22 | DIMENSIONS = { 23 | "ga:date": lambda date: utils.date.parse(date).date(), 24 | "ga:dateHour": lambda date: utils.date.parse("{} {}".format(date[:8], date[8:])), 25 | } 26 | 27 | 28 | def escape_chars(value, chars=",;"): 29 | if value is True: 30 | return "Yes" 31 | if value is False: 32 | return "No" 33 | 34 | value = utils.unicode(value) 35 | for char in chars: 36 | value = value.replace(char, "\\" + char) 37 | 38 | return value 39 | 40 | 41 | def escape(method): 42 | @functools.wraps(method) 43 | def escaped_method(self, *values): 44 | values = utils.builtins.map(escape_chars, values) 45 | return method(self, *values) 46 | 47 | return escaped_method 48 | 49 | 50 | class Column: 51 | selectors = ( 52 | "eq", 53 | "neq", 54 | "lt", 55 | "lte", 56 | "gt", 57 | "gte", 58 | "between", 59 | "any", 60 | "contains", 61 | "ncontains", 62 | "re", 63 | "nre", 64 | ) 65 | 66 | @classmethod 67 | def from_metadata(cls, metadata): 68 | attributes = metadata["attributes"] 69 | data_format = ( 70 | DIMENSIONS.get(metadata["id"]) 71 | or TYPES.get(attributes["dataType"]) 72 | or utils.identity 73 | ) 74 | is_deprecated = attributes.get("status", "ACTIVE") == "DEPRECATED" 75 | is_allowed_in_segments = "allowedInSegments" in attributes 76 | column = Column( 77 | metadata["id"], 78 | column_type=attributes["type"].lower(), 79 | format=data_format, 80 | attributes=attributes, 81 | deprecated=is_deprecated, 82 | allowed_in_segments=is_allowed_in_segments, 83 | ) 84 | return column.expand() 85 | 86 | def __init__( 87 | self, 88 | column_id, 89 | column_type, 90 | format=utils.unicode, 91 | attributes={}, 92 | deprecated=False, 93 | allowed_in_segments=True, 94 | ): 95 | self.account = None 96 | self.id = column_id 97 | self.report_type, self.slug = self.id.split(":") 98 | index = re.search(r"\d{1,2}", self.slug) 99 | if index: 100 | self.index = int(index.group(0)) 101 | else: 102 | self.index = None 103 | self.python_slug = snakify(self.slug) 104 | self.attributes = attributes 105 | self.name = attributes.get("uiName", column_id).replace("XX", str(self.index)) 106 | self.group = attributes.get("group") 107 | self.description = attributes.get("description") 108 | self.type = column_type 109 | # TODO: evaluate if we can improve casting 110 | self.cast = format 111 | self.is_deprecated = deprecated 112 | self.is_allowed_in_segments = allowed_in_segments 113 | 114 | def link(self, account): 115 | self.account = account 116 | 117 | def expand(self): 118 | columns = [] 119 | if "XX" in self.id: 120 | min_index = int(self.attributes.get("minTemplateIndex", "1")) 121 | max_index = int(self.attributes.get("maxTemplateIndex", "20")) 122 | for i in range(min_index, max_index + 1): 123 | column = Column( 124 | self.id.replace("XX", str(i)), 125 | column_type=self.type, 126 | format=self.cast, 127 | attributes=self.attributes, 128 | deprecated=self.is_deprecated, 129 | allowed_in_segments=self.is_allowed_in_segments, 130 | ) 131 | columns.append(column) 132 | else: 133 | columns = [self] 134 | 135 | return columns 136 | 137 | @escape 138 | def eq(self, value): 139 | return "{id}=={value}".format(id=self.id, value=value) 140 | 141 | @escape 142 | def neq(self, value): 143 | return "{id}!={value}".format(id=self.id, value=value) 144 | 145 | @escape 146 | def lt(self, value): 147 | return "{id}<{value}".format(id=self.id, value=value) 148 | 149 | @escape 150 | def lte(self, value): 151 | return "{id}<={value}".format(id=self.id, value=value) 152 | 153 | @escape 154 | def gt(self, value): 155 | return "{id}>{value}".format(id=self.id, value=value) 156 | 157 | @escape 158 | def gte(self, value): 159 | return "{id}>={value}".format(id=self.id, value=value) 160 | 161 | @escape 162 | def between(self, a, b): 163 | return "{id}<>{a}_{b}".format(id=self.id, a=a, b=b) 164 | 165 | @escape 166 | def any(self, *values): 167 | return "{id}[]{values}".format(id=self.id, values="|".join(values)) 168 | 169 | @escape 170 | def contains(self, value): 171 | return "{id}=@{value}".format(id=self.id, value=value) 172 | 173 | @escape 174 | def ncontains(self, value): 175 | return "{id}!@{value}".format(id=self.id, value=value) 176 | 177 | @escape 178 | def re(self, value): 179 | return "{id}=~{value}".format(id=self.id, value=value) 180 | 181 | @escape 182 | def nre(self, value): 183 | return "{id}!~{value}".format(id=self.id, value=value) 184 | 185 | # useful when sorting a query 186 | def __neg__(self): 187 | return "-" + self.id 188 | 189 | def __repr__(self): 190 | report_types = { 191 | "ga": "Core", 192 | "rt": "Realtime", 193 | None: "Unbound", 194 | } 195 | return "".format( 196 | query_type=report_types[self.report_type], 197 | column_type=self.type.capitalize(), 198 | name=self.name, 199 | id=self.id, 200 | ) 201 | 202 | 203 | # see https://developers.google.com/analytics/devguides/reporting/core/v3/segments#reference 204 | class Segment(Column): 205 | # CHECK: do we need to call super here? 206 | def __init__(self, raw, account): 207 | self.raw = raw 208 | self.id = raw["segmentId"] 209 | self.report_type, self.slug = self.id.split("::") 210 | self.python_slug = snakify(self.slug) 211 | self.name = raw["name"] 212 | self.kind = raw["kind"].lower() 213 | self.definition = raw["definition"] 214 | 215 | def __repr__(self): 216 | return "".format( 217 | **self.__dict__ 218 | ) 219 | 220 | 221 | class Filter: 222 | pass 223 | 224 | 225 | class Goal: 226 | pass 227 | 228 | """ 229 | goals = service.management().goals().list( 230 | accountId=firstAccountId, 231 | webPropertyId=firstWebpropertyId, 232 | profileId=firstProfileId).execute() 233 | """ 234 | 235 | 236 | class ColumnList(addressable.List): 237 | COLUMN_TYPE = Column 238 | 239 | def __init__(self, columns, **options): 240 | options["items"] = columns 241 | options["name"] = self.COLUMN_TYPE.__class__.__name__ 242 | options["indices"] = ("name", "id", "slug", "python_slug") 243 | options["insensitive"] = True 244 | super(ColumnList, self).__init__(**options) 245 | 246 | @utils.vectorize 247 | def normalize(self, value): 248 | if isinstance(value, self.COLUMN_TYPE): 249 | return value 250 | 251 | return self[value] 252 | 253 | @utils.vectorize 254 | def serialize(self, value, greedy=True): 255 | """ 256 | Greedy serialization requires the value to either be a column 257 | or convertible to a column, whereas non-greedy serialization 258 | will pass through any string as-is and will only serialize 259 | Column objects. 260 | 261 | Non-greedy serialization is useful when preparing queries with 262 | custom filters or segments. 263 | """ 264 | 265 | if greedy and not isinstance(value, Column): 266 | value = self.normalize(value) 267 | 268 | if isinstance(value, Column): 269 | return value.id 270 | 271 | return value 272 | 273 | 274 | class SegmentList(ColumnList): 275 | COLUMN_TYPE = Segment 276 | 277 | 278 | def is_deprecated(column): 279 | return column.is_deprecated 280 | 281 | 282 | def is_supported(column): 283 | return not column.is_deprecated 284 | 285 | 286 | def is_metric(column): 287 | return column.type == "metric" 288 | 289 | 290 | def is_dimension(column): 291 | return column.type == "dimension" 292 | 293 | 294 | def is_core(column): 295 | return column.report_type == "ga" 296 | 297 | 298 | def is_live(column): 299 | return column.report_type == "rt" 300 | -------------------------------------------------------------------------------- /src/forecastga/ga/errors.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | class GoogleAnalyticsError(Exception): 5 | pass 6 | 7 | 8 | class InvalidRequestError(GoogleAnalyticsError): 9 | # invalid parameter, bad request 10 | pass 11 | 12 | 13 | class NotPermittedError(GoogleAnalyticsError): 14 | # invalid credentials, no permission 15 | pass 16 | 17 | 18 | class LimitExceededError(GoogleAnalyticsError): 19 | # quota, rate limit, ... 20 | pass 21 | 22 | 23 | class ServerError(GoogleAnalyticsError): 24 | # internal server error / backend error 25 | pass 26 | -------------------------------------------------------------------------------- /src/forecastga/ga/realtime.yml: -------------------------------------------------------------------------------- 1 | - id: rt:activeUsers 2 | attributes: 3 | uiName: Active Users 4 | group: user 5 | description: The number of users interacting with the property right now. 6 | type: metric 7 | dataType: INTEGER 8 | - id: rt:goalXXValue 9 | attributes: 10 | uiName: Goal XX Value 11 | group: goal conversions 12 | description: | 13 | The total numeric value for the requested goal number, where XX is a number between 1 and 20. 14 | type: metric 15 | dataType: CURRENCY 16 | - id: rt:goalValueAll 17 | attributes: 18 | uiName: Goal Value 19 | group: goal conversions 20 | description: | 21 | The total numeric value for all goals defined for your view (profile). 22 | type: metric 23 | dataType: CURRENCY 24 | - id: rt:goalXXCompletions 25 | attributes: 26 | uiName: Goal XX Completions 27 | group: goal conversions 28 | description: | 29 | The total number of completions for the requested goal number, where XX is a number between 1 and 20. 30 | type: metric 31 | dataType: INTEGER 32 | - id: rt:goalCompletionsAll 33 | attributes: 34 | uiName: Goal Completions 35 | group: goal conversions 36 | description: | 37 | The total number of completions for all goals defined for your view (profile). 38 | type: metric 39 | dataType: INTEGER 40 | - id: rt:pageviews 41 | attributes: 42 | uiName: Pageviews 43 | group: page tracking 44 | description: | 45 | The total number of page views. 46 | type: metric 47 | dataType: INTEGER 48 | - id: rt:screenViews 49 | attributes: 50 | uiName: Screen Views 51 | group: app tracking 52 | description: | 53 | The total number of screen views. 54 | type: metric 55 | dataType: INTEGER 56 | - id: rt:totalEvents 57 | attributes: 58 | uiName: Total Events 59 | group: event tracking 60 | description: | 61 | The total number of events for the view (profile), across all categories. 62 | type: metric 63 | dataType: INTEGER 64 | - id: rt:userType 65 | attributes: 66 | uiName: User Type 67 | group: user 68 | description: A boolean indicating if a user is new or returning. Possible values are `new` and `returning`. 69 | type: dimension 70 | dataType: STRING 71 | - id: rt:minutesAgo 72 | attributes: 73 | uiName: Minutes Ago 74 | group: time 75 | description: The number of minutes ago a hit occurred. 76 | type: dimension 77 | dataType: INTEGER 78 | - id: rt:referralPath 79 | attributes: 80 | uiName: Referral Path 81 | group: traffic sources 82 | description: | 83 | The path of the referring URL (e.g. document.referrer). If someone places a link to your property on their website, this element contains the path of the page that contains the referring link. This value is only set when `rt:medium=referral`. 84 | type: dimension 85 | dataType: STRING 86 | - id: rt:campaign 87 | attributes: 88 | uiName: Campaign 89 | group: traffic sources 90 | description: | 91 | When using manual campaign tracking, the value of the `utm_campaign` campaign tracking parameter. When using AdWords autotagging, the name(s) of the online ad campaign that you use for your property. Otherwise the value `(not set)` is used. 92 | type: dimension 93 | dataType: STRING 94 | - id: rt:source 95 | attributes: 96 | uiName: Source 97 | group: traffic sources 98 | description: | 99 | The source of referrals to your property. When using manual campaign tracking, the value of the `utm_source` campaign tracking parameter. When using AdWords autotagging, the value is `google`. Otherwise the domain of the source referring the user to your property (e.g. `document.referrer`). The value may also contain a port address. If the user arrived without a referrer, the value is `(direct)`. 100 | type: dimension 101 | dataType: STRING 102 | - id: rt:medium 103 | attributes: 104 | uiName: Medium 105 | group: traffic sources 106 | description: | 107 | The type of referrals to your property. When using manual campaign tracking, the value of the `utm_medium` campaign tracking parameter. When using AdWords autotagging, the value is `ppc`. If the user comes from a search engine detected by Google Analytics, the value is `organic`. If the referrer is not a search engine, the value is `referral`. If the user came directly to the property, and `document.referrer` is empty, the value is `(direct)`. 108 | type: dimension 109 | dataType: STRING 110 | - id: rt:trafficType 111 | attributes: 112 | uiName: Traffic Type 113 | group: traffic sources 114 | description: | 115 | This dimension is similar to `rt:medium` for constant values such as `organic`, `referral`, `direct`, etc. It is different for custom referral types. As an example, if you add the `utm_campaign` parameter to your URL with value *email*, `rt:medium` will be *email* but `rt:trafficType` will be *custom*. 116 | type: dimension 117 | dataType: STRING 118 | - id: rt:keyword 119 | attributes: 120 | uiName: Keyword 121 | group: traffic sources 122 | description: | 123 | When using manual campaign tracking, the value of the `utm_term` campaign tracking parameter. When using AdWords autotagging or if a user used organic search to reach your property, the keywords used by users to reach your property. Otherwise the value is `(not set)`. 124 | type: dimension 125 | dataType: STRING 126 | - id: rt:goalId 127 | attributes: 128 | uiName: Goal ID 129 | group: goal conversions 130 | description: A string. Corresponds to the goal ID. 131 | type: dimension 132 | dataType: STRING 133 | - id: rt:browser 134 | attributes: 135 | uiName: Browser 136 | group: platform / device 137 | description: The names of browsers used by users to your property. 138 | type: dimension 139 | dataType: STRING 140 | - id: rt:browserVersion 141 | attributes: 142 | uiName: Browser Version 143 | group: platform / device 144 | description: The browser versions used by users to your property. 145 | type: dimension 146 | dataType: STRING 147 | - id: rt:operatingSystem 148 | attributes: 149 | uiName: Operating System 150 | group: platform / device 151 | description: The operating system used by users to your property. 152 | type: dimension 153 | dataType: STRING 154 | - id: rt:operatingSystemVersion 155 | attributes: 156 | uiName: Operating System Version 157 | group: platform / device 158 | description: The version of the operating system used by users to your property 159 | type: dimension 160 | dataType: STRING 161 | - id: rt:deviceCategory 162 | attributes: 163 | uiName: Device Category 164 | group: platform / device 165 | description: | 166 | The type of device: `Desktop`, `Tablet`, or `Mobile`. 167 | type: dimension 168 | dataType: STRING 169 | - id: rt:mobileDeviceBranding 170 | attributes: 171 | uiName: Mobile Device Branding 172 | group: platform / device 173 | description: | 174 | Mobile manufacturer or branded name (e.g: Samsung, HTC, Verizon, T-Mobile). 175 | type: dimension 176 | dataType: STRING 177 | - id: rt:mobileDeviceModel 178 | attributes: 179 | uiName: Mobile Device Model 180 | group: platform / device 181 | description: | 182 | Mobile device model (e.g.: Nexus S) 183 | type: dimension 184 | dataType: STRING 185 | - id: rt:country 186 | attributes: 187 | uiName: Country 188 | group: geo 189 | description: | 190 | The countries of website users, derived from IP addresses. 191 | type: dimension 192 | dataType: STRING 193 | - id: rt:region 194 | attributes: 195 | uiName: Region 196 | group: geo 197 | description: | 198 | The region of users to your property, derived from IP addresses. In the U.S., a region is a state, such as `New York`. 199 | type: dimension 200 | dataType: STRING 201 | - id: rt:city 202 | attributes: 203 | uiName: City 204 | group: geo 205 | description: | 206 | The cities of users, derived from IP addresses. 207 | type: dimension 208 | dataType: STRING 209 | - id: rt:latitude 210 | attributes: 211 | uiName: Latitude 212 | group: geo 213 | description: | 214 | The approximate latitude of the user's city. Derived from IP address. Locations north of the equator are represented by positive values and locations south of the equator by negative values. 215 | type: dimension 216 | dataType: STRING 217 | - id: rt:longitude 218 | attributes: 219 | uiName: Longitude 220 | group: geo 221 | description: | 222 | The approximate longitude of the user's city. Derived from IP address. Locations east of the prime meridian are represented by positive values and locations west of the prime meridian by negative values. 223 | type: dimension 224 | dataType: STRING 225 | - id: rt:pagePath 226 | attributes: 227 | uiName: Page Path 228 | group: page tracking 229 | description: | 230 | A page on your property specified by path and/or query parameters. 231 | type: dimension 232 | dataType: STRING 233 | - id: rt:pageTitle 234 | attributes: 235 | uiName: Page Title 236 | group: page tracking 237 | description: | 238 | The title of a page. Keep in mind that multiple pages might have the same page title. 239 | type: dimension 240 | dataType: STRING 241 | - id: rt:appName 242 | attributes: 243 | uiName: App Name 244 | group: app tracking 245 | description: | 246 | The name of the application. 247 | type: dimension 248 | dataType: STRING 249 | - id: rt:appVersion 250 | attributes: 251 | uiName: App Version 252 | group: app tracking 253 | description: | 254 | The version of the application. 255 | type: dimension 256 | dataType: STRING 257 | - id: rt:screenName 258 | attributes: 259 | uiName: Screen Name 260 | group: app tracking 261 | description: | 262 | The name of a screen. 263 | type: dimension 264 | dataType: STRING 265 | - id: rt:eventAction 266 | attributes: 267 | uiName: Event Action 268 | group: event tracking 269 | description: | 270 | The action of the event. 271 | type: dimension 272 | dataType: STRING 273 | - id: rt:eventCategory 274 | attributes: 275 | uiName: Event Category 276 | group: event tracking 277 | description: | 278 | The category of the event. 279 | type: dimension 280 | dataType: STRING 281 | - id: rt:eventLabel 282 | attributes: 283 | uiName: Event Label 284 | group: event tracking 285 | description: | 286 | The label of the event. 287 | type: dimension 288 | dataType: STRING 289 | -------------------------------------------------------------------------------- /src/forecastga/ga/segments.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | """ 4 | Chaining for filters and segments. 5 | """ 6 | 7 | 8 | def condition(value): 9 | return "condition::" + value 10 | 11 | 12 | def sequence(value): 13 | return "sequence::" + value 14 | 15 | 16 | def all(*values): 17 | return condition(";".join(values)) 18 | 19 | 20 | def any(*values): 21 | return condition(",".join(values)) 22 | 23 | 24 | def followed_by(*values): 25 | return sequence(";->>".join(values)) 26 | 27 | 28 | def immediately_followed_by(*values): 29 | return sequence(";->".join(values)) 30 | -------------------------------------------------------------------------------- /src/forecastga/ga/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import os 4 | import copy 5 | import operator 6 | import functools 7 | 8 | from . import date 9 | from .functional import memoize, immutable, identity, soak, vectorize 10 | from .server import single_serve 11 | from .string import format, affix, paste, cut 12 | 13 | 14 | # Python 2 and 3 compatibility 15 | try: 16 | basestring = basestring 17 | unicode = unicode 18 | input = raw_input 19 | except NameError: 20 | basestring = str 21 | unicode = str 22 | input = input 23 | 24 | try: 25 | import __builtin__ as builtins 26 | from StringIO import StringIO 27 | except ImportError: 28 | import builtins 29 | from io import StringIO 30 | 31 | 32 | # return a path relative to the package root 33 | def here(*segments): 34 | current = os.path.dirname(__file__) 35 | return os.path.realpath(os.path.join(current, "..", *segments)) 36 | 37 | 38 | # flatten nested lists 39 | def flatten(nested_list): 40 | return functools.reduce(operator.add, nested_list) 41 | 42 | 43 | # wrap scalars into a list 44 | def wrap(obj): 45 | if isinstance(obj, list): 46 | return obj 47 | 48 | return [obj] 49 | 50 | 51 | # substitute new dictionary keys 52 | def translate(d, mapping): 53 | d = copy.copy(d) 54 | 55 | for src, dest in mapping.items(): 56 | if src in d: 57 | d[dest] = d[src] 58 | del d[src] 59 | 60 | return d 61 | 62 | 63 | # retain only whitelisted keys in a dictionary 64 | def whitelist(d, allowed): 65 | return {k: v for k, v in d.items() if k in allowed} 66 | 67 | 68 | # similar to whitelist, but ordered and returns only values, not keys 69 | def pick(obj, allowed): 70 | if isinstance(obj, dict): 71 | get = lambda key: obj[key] 72 | else: 73 | get = lambda key: getattr(obj, key) 74 | 75 | values = [] 76 | for key in allowed: 77 | values.append(get(key)) 78 | 79 | return values 80 | 81 | 82 | # test if an object is falsy or contains only falsy values 83 | def isempty(obj): 84 | if isinstance(obj, list): 85 | return not len(list(filter(None, obj))) > 0 86 | if isinstance(obj, dict): 87 | return not len(obj) > 0 88 | 89 | return not obj 90 | -------------------------------------------------------------------------------- /src/forecastga/ga/utils/date.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import re 3 | 4 | from dateutil.parser import parse 5 | from dateutil.relativedelta import relativedelta 6 | 7 | 8 | # Python 2 and 3 compatibility 9 | try: 10 | basestring = basestring 11 | except NameError: 12 | basestring = str 13 | 14 | 15 | def serialize(value): 16 | if isinstance(value, datetime.date): 17 | return value.isoformat() 18 | else: 19 | return value 20 | 21 | 22 | def extract(obj): 23 | if isinstance(obj, datetime.date): 24 | if hasattr(obj, "date"): 25 | return obj.date() 26 | else: 27 | return obj 28 | else: 29 | raise ValueError( 30 | "Can only extract date for type: date, datetime. Received: {}".format(obj) 31 | ) 32 | 33 | 34 | def parse_description(s): 35 | today = datetime.date.today() 36 | if s == "today": 37 | return today 38 | elif s == "yesterday": 39 | return today - relativedelta(days=1) 40 | else: 41 | match = re.match("(\d+)daysAgo", s) 42 | if match: 43 | return today - relativedelta(days=int(match.group(1))) 44 | else: 45 | raise ValueError( 46 | "Can only parse descriptions of the format: today, yesterday, ndaysAgo" 47 | ) 48 | 49 | 50 | def normalize(obj): 51 | if obj == None: 52 | return None 53 | elif isinstance(obj, datetime.date): 54 | return extract(obj) 55 | elif isinstance(obj, basestring): 56 | try: 57 | return extract(parse(obj)) 58 | except ValueError: 59 | try: 60 | return extract(parse_description(obj)) 61 | except ValueError: 62 | raise ValueError("Cannot parse date or description: " + obj) 63 | else: 64 | raise ValueError( 65 | "Can only normalize dates of type: date, datetime, basestring." 66 | ) 67 | 68 | 69 | def range(start=None, stop=None, months=0, days=0): 70 | yesterday = datetime.date.today() - relativedelta(days=1) 71 | start = normalize(start) or yesterday 72 | stop = normalize(stop) 73 | is_past = days < 0 or months < 0 74 | 75 | if days or months: 76 | if start and stop: 77 | raise Exception( 78 | "A daterange cannot be defined using stop alongside months or days." 79 | ) 80 | else: 81 | if is_past: 82 | days = days + 1 83 | else: 84 | days = days - 1 85 | 86 | delta = relativedelta(days=days, months=months) 87 | 88 | stop = start + delta 89 | 90 | stop = stop or start 91 | return map(serialize, sorted((start, stop))) 92 | 93 | 94 | def is_relative(datestring): 95 | return not "-" in datestring 96 | -------------------------------------------------------------------------------- /src/forecastga/ga/utils/functional.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import functools 4 | import inspector 5 | 6 | 7 | class memoize: 8 | def __init__(self, function): 9 | self.function = function 10 | self.memoized = {} 11 | 12 | def __call__(self, *args): 13 | try: 14 | return self.memoized[args] 15 | except KeyError: 16 | self.memoized[args] = self.function(*args) 17 | return self.memoized[args] 18 | 19 | 20 | def vectorize(fn): 21 | """ 22 | Allows a method to accept one or more values, 23 | but internally deal only with a single item, 24 | and returning a list or a single item depending 25 | on what is desired. 26 | """ 27 | 28 | @functools.wraps(fn) 29 | def vectorized_method(self, values, *vargs, **kwargs): 30 | wrap = not isinstance(values, (list, tuple)) 31 | should_unwrap = not kwargs.setdefault("wrap", False) 32 | unwrap = wrap and should_unwrap 33 | del kwargs["wrap"] 34 | 35 | if wrap: 36 | values = [values] 37 | 38 | results = [fn(self, value, *vargs, **kwargs) for value in values] 39 | 40 | if unwrap: 41 | results = results[0] 42 | 43 | return results 44 | 45 | return vectorized_method 46 | 47 | 48 | def immutable(method): 49 | @inspector.wraps(method) 50 | def wrapped_method(self, *vargs, **kwargs): 51 | obj = self.clone() 52 | method(obj, *vargs, **kwargs) 53 | return obj 54 | 55 | return wrapped_method 56 | 57 | 58 | def identity(value): 59 | return value 60 | 61 | 62 | def soak(*vargs, **kwargs): 63 | pass 64 | -------------------------------------------------------------------------------- /src/forecastga/ga/utils/server.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | def single_serve(message=None, port=5000): 5 | import logging 6 | from werkzeug.wrappers import Request, Response 7 | from werkzeug.serving import run_simple 8 | 9 | log = logging.getLogger("werkzeug") 10 | log.setLevel(logging.ERROR) 11 | 12 | captured = {} 13 | 14 | def application(environ, start_response): 15 | request = Request(environ) 16 | request.environ.get("werkzeug.server.shutdown")() 17 | captured.update(dict(request.args.items())) 18 | if message: 19 | print(message) 20 | response = Response(message, mimetype="text/plain") 21 | return response(environ, start_response) 22 | 23 | run_simple("localhost", port, application) 24 | return captured 25 | -------------------------------------------------------------------------------- /src/forecastga/ga/utils/string.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | 4 | # Python 2 and 3 compatibility 5 | try: 6 | unicode = unicode 7 | except NameError: 8 | unicode = str 9 | 10 | 11 | def format(string, **kwargs): 12 | return textwrap.dedent(string).format(**kwargs).strip() 13 | 14 | 15 | def affix(prefix, base, suffix, connector="_"): 16 | if prefix: 17 | prefix = prefix + connector 18 | else: 19 | prefix = "" 20 | 21 | if suffix: 22 | suffix = connector + suffix 23 | else: 24 | suffix = "" 25 | 26 | return prefix + base + suffix 27 | 28 | 29 | # a supercharged `join` function, analogous to `paste` in the R language 30 | def paste(rows, *delimiters): 31 | delimiter = delimiters[-1] 32 | delimiters = delimiters[:-1] 33 | 34 | if len(delimiters): 35 | return paste([paste(row, *delimiters) for row in rows], delimiter) 36 | else: 37 | return delimiter.join(map(unicode, rows)) 38 | 39 | 40 | # a supercharged `split` function, the inverse of `paste` 41 | def cut(s, *delimiters): 42 | delimiter = delimiters[-1] 43 | delimiters = delimiters[:-1] 44 | 45 | if len(delimiters): 46 | return [cut(ss, *delimiters) for ss in cut(s, delimiter)] 47 | else: 48 | return s.split(delimiter) 49 | -------------------------------------------------------------------------------- /src/forecastga/helpers/colab.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | """ForecastGA: Colab Utils""" 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | def plot_colab(df, title=None, dark_mode=False): 10 | 11 | from IPython.display import Markdown as md 12 | 13 | plt.style.use("default") 14 | 15 | def show_md(txt): 16 | display(md(txt)) 17 | 18 | if dark_mode: 19 | # Good all around color library 20 | plt.style.use("seaborn-colorblind") 21 | plt.rcParams.update( 22 | { 23 | "lines.color": "#565555", 24 | "legend.edgecolor": "#818080", 25 | "legend.borderpad": 0.6, 26 | "text.color": "white", 27 | "axes.facecolor": "#383838", 28 | "axes.edgecolor": "#565555", 29 | "axes.grid": True, 30 | "axes.labelcolor": "white", 31 | "grid.color": "#565555", 32 | "xtick.color": "white", 33 | "ytick.color": "white", 34 | "figure.facecolor": "#383838", 35 | "savefig.facecolor": "white", 36 | "savefig.edgecolor": "white", 37 | "font.sans-serif": "Liberation Sans", 38 | "lines.linewidth": 2, 39 | "figure.figsize": [15, 10], 40 | "font.size": 16, 41 | } 42 | ) 43 | 44 | else: 45 | plt.style.use("seaborn-colorblind") 46 | plt.rcParams.update( 47 | { 48 | "legend.borderpad": 0.6, 49 | "axes.grid": True, 50 | "font.sans-serif": "Liberation Sans", 51 | "lines.linewidth": 2, 52 | "figure.figsize": [15, 10], 53 | "font.size": 16, 54 | } 55 | ) 56 | 57 | if title: 58 | show_md("## {}".format(title)) 59 | 60 | df.plot() 61 | plt.show() 62 | -------------------------------------------------------------------------------- /src/forecastga/helpers/data.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: Data Helpers""" 6 | 7 | import pandas as pd 8 | import numpy as np 9 | 10 | from seasonal.periodogram import periodogram 11 | from scipy.signal import find_peaks 12 | from sklearn.model_selection import train_test_split as tts 13 | 14 | from forecastga.helpers.ssa import mySSA 15 | from forecastga.helpers.logging import get_logger 16 | 17 | _LOG = get_logger(__name__) 18 | 19 | from forecastga.models import MODELS 20 | 21 | 22 | def print_model_info(): 23 | _ = [ 24 | print(v["name"], ":", v["description"]) 25 | for k, v in MODELS.items() 26 | if v["status"] == "active" 27 | ] 28 | 29 | 30 | def constant_feature_detect(data, threshold=0.98): 31 | """detect features that show the same value for the 32 | majority/all of the observations (constant/quasi-constant features) 33 | 34 | Parameters 35 | ---------- 36 | data : pd.Dataframe 37 | threshold : threshold to identify the variable as constant 38 | 39 | Returns 40 | ------- 41 | list of variables names 42 | """ 43 | 44 | data_copy = data.copy(deep=True) 45 | quasi_constant_feature = [] 46 | for feature in data_copy.columns: 47 | predominant = ( 48 | (data_copy[feature].value_counts() / np.float(len(data_copy))) 49 | .sort_values(ascending=False) 50 | .values[0] 51 | ) 52 | if predominant >= threshold: 53 | quasi_constant_feature.append(feature) 54 | _LOG.info(len(quasi_constant_feature), " variables are found to be almost constant") 55 | return quasi_constant_feature 56 | 57 | 58 | # More Diverse Selection For TBAT 59 | def infer_seasonality_ssa(train, index=1): 60 | ssa = mySSA(train) 61 | ssa.embed(embedding_dimension=36, verbose=False) 62 | ssa.decompose(True) 63 | rec = ssa.view_reconstruction( 64 | ssa.Xs[index], names="Seasonality", return_df=True, plot=False 65 | ) 66 | peaks, _ = find_peaks( 67 | rec.values.reshape( 68 | len(rec), 69 | ), 70 | height=0, 71 | ) 72 | peak_diffs = [j - i for i, j in zip(peaks[:-1], peaks[1:])] 73 | seasonality = max(peak_diffs, key=peak_diffs.count) 74 | return seasonality 75 | 76 | 77 | # Good First Selection 78 | def infer_seasonality(train, index=0): # skip the first one, normally 79 | interval, power = periodogram(train, min_period=4, max_period=None) 80 | try: 81 | season = int( 82 | pd.DataFrame([interval, power]) 83 | .T.sort_values(1, ascending=False) 84 | .iloc[0, index] 85 | ) 86 | except: 87 | _LOG.warning("Welch Season failed, defaulting to SSA solution") 88 | season = int(infer_seasonality_ssa(train, index=1)) 89 | return season 90 | 91 | 92 | def infer_periodocity(train): 93 | perd = pd.infer_freq(train.index) 94 | if perd in ["MS", "M", "BM", "BMS"]: 95 | periodocity = 12 96 | elif perd in ["BH", "H"]: 97 | periodocity = 24 98 | elif perd == "B": 99 | periodocity = 5 100 | elif perd == "D": 101 | periodocity = 7 102 | elif perd in ["W", "W-SUN", "W-MON", "W-TUE", "W-WED", "W-THU", "W-FRI", "W-SAT"]: 103 | periodocity = 52 104 | elif perd in ["Q", "QS", "BQ", "BQS"]: 105 | periodocity = 4 106 | elif perd in ["A", "BA", "AS", "BAS"]: 107 | periodocity = 10 108 | elif perd in ["T", "min"]: 109 | periodocity = 60 110 | elif perd == "S": 111 | periodocity = 60 112 | elif perd in ["L", "ms"]: 113 | periodocity = 1000 114 | elif perd in ["U", "us"]: 115 | periodocity = 1000 116 | elif perd == "N": 117 | periodocity = 1000 118 | 119 | return periodocity 120 | 121 | 122 | def select_seasonality(train, season): 123 | if season == "periodocity": 124 | seasonality = infer_periodocity(train) 125 | elif season == "infer_from_data": 126 | seasonality = infer_seasonality(train) 127 | return seasonality 128 | 129 | 130 | def add_freq(idx, freq=None): 131 | """Add a frequency attribute to idx, through inference or directly. 132 | 133 | Returns a copy. If `freq` is None, it is inferred. 134 | """ 135 | idx = idx.copy() 136 | if freq is None: 137 | if idx.freq is None: 138 | freq = pd.infer_freq(idx) 139 | else: 140 | return idx 141 | idx.freq = pd.tseries.frequencies.to_offset(freq) 142 | if idx.freq is None: 143 | raise AttributeError( 144 | "no discernible frequency found to `idx`. Specify" 145 | " a frequency string with `freq`." 146 | ) 147 | return idx 148 | 149 | 150 | def parse_data(df): 151 | if type(df) == pd.DataFrame: 152 | if df.shape[1] > 1: 153 | raise ValueError("The dataframe should only contain one target column") 154 | elif type(df) == pd.Series: 155 | df = df.to_frame() 156 | else: 157 | raise TypeError( 158 | "Please supply a pandas dataframe with one column or a pandas series" 159 | ) 160 | try: 161 | df.index.date 162 | except AttributeError: 163 | raise TypeError("The index should be a datetype") 164 | 165 | if df.isnull().any().values[0]: 166 | raise ValueError( 167 | "The dataframe cannot have any null values, please interpolate" 168 | ) 169 | try: 170 | df.columns = ["Target"] 171 | except: 172 | raise ValueError("There should only be one column") 173 | 174 | df.index = df.index.rename("Date") 175 | df.index = add_freq(df.index) 176 | 177 | _LOG.info( 178 | "The data has been successfully parsed by infering a frequency, and establishing a 'Date' index and 'Target' column." 179 | ) 180 | 181 | return df, pd.infer_freq(df.index) 182 | 183 | 184 | def train_test_split(df, forecast_len=30): 185 | 186 | train, test = tts( 187 | df["Target"], test_size=forecast_len, shuffle=False, stratify=None 188 | ) 189 | _LOG.info( 190 | "An insample split of training size {} and testing size {} has been constructed".format( 191 | len(train), len(test) 192 | ) 193 | ) 194 | return train, test 195 | 196 | 197 | def season_list(train): 198 | lista = [] 199 | for i in range(15): 200 | i = 1 + i 201 | lista.append(infer_seasonality_ssa(train, i)) 202 | return lista 203 | 204 | 205 | def get_unique_N(iterable, N): 206 | """Yields (in order) the first N unique elements of iterable. 207 | Might yield less if data too short.""" 208 | seen = set() 209 | for e in iterable: 210 | if e in seen: 211 | continue 212 | seen.add(e) 213 | yield e 214 | if len(seen) == N: 215 | _LOG.info( 216 | "The following set of plausible SSA seasonalities have been identified: {}".format( 217 | seen 218 | ) 219 | ) 220 | return 221 | 222 | 223 | # simple batcher. 224 | def data_generator(x_full, y_full, bs): 225 | def split(arr, size): 226 | arrays = [] 227 | while len(arr) > size: 228 | slice_ = arr[:size] 229 | arrays.append(slice_) 230 | arr = arr[size:] 231 | arrays.append(arr) 232 | return arrays 233 | 234 | while True: 235 | for rr in split((x_full, y_full), bs): 236 | yield rr 237 | -------------------------------------------------------------------------------- /src/forecastga/helpers/ga_data.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | 4 | """ForecastGA: Google Analytics Helper Functions""" 5 | 6 | import os 7 | from types import SimpleNamespace 8 | from datetime import datetime 9 | import pandas as pd 10 | import numpy as np 11 | 12 | from forecastga.helpers.logging import get_logger 13 | 14 | from forecastga import ga 15 | 16 | _LOG = get_logger(__name__) 17 | 18 | 19 | def load_identity(data=None): 20 | 21 | if data: 22 | jf = { 23 | k.lower(): v 24 | for k, v in data.items() 25 | if k.lower() in ["client_id", "client_secret", "identity"] 26 | } 27 | return SimpleNamespace(**jf) 28 | 29 | if not os.path.isfile("identity.json"): 30 | raise FileExistsError( 31 | "A JSON file named `identity.json` must be accessible with your API credentials." 32 | ) 33 | 34 | with open("identity.json") as f: 35 | jf = json.load(f) 36 | identify_json = SimpleNamespace(**jf) 37 | 38 | return identify_json 39 | 40 | 41 | def load_profile(ga_url, identify_ns): 42 | 43 | try: 44 | profile = ga.authenticate( 45 | client_id=identify_ns.client_id, 46 | client_secret=identify_ns.client_secret, 47 | identity=identify_ns.identity, 48 | ga_url=ga_url, 49 | interactive=True, 50 | ) 51 | _LOG.info("Authenticated") 52 | return profile 53 | 54 | except Exception as e: 55 | _LOG.error("An error occured: " + str(e)) 56 | return None 57 | 58 | 59 | def p_date(_dt): 60 | return datetime.strftime(_dt, "%Y-%m-%d") 61 | 62 | 63 | def get_ga_data(data): 64 | 65 | if "client_id" in data and "client_secret" in data and "identity" in data: 66 | identify_ns = load_identity(data) 67 | else: 68 | identify_ns = load_identity() 69 | 70 | if "ga_url" not in data: 71 | raise AttributeError( 72 | "You must provide the URL for your Google Analytics property." 73 | ) 74 | 75 | profile = load_profile(data["ga_url"], identify_ns) 76 | 77 | if profile is None: 78 | return None 79 | 80 | data = SimpleNamespace(**data) 81 | 82 | try: 83 | print( 84 | "Pulling data from {} to {}.".format(data.ga_start_date, data.ga_end_date) 85 | ) 86 | sessions = ( 87 | profile.core.query.metrics(data.ga_metric) 88 | .segment(data.ga_segment) 89 | .daily(data.ga_start_date, data.ga_end_date) 90 | .report 91 | ) 92 | 93 | except Exception as e: 94 | _LOG.error("Error. Error retreiving data from Google Analytics.", str(e)) 95 | return None 96 | 97 | df = sessions.as_dataframe() 98 | 99 | df["date"] = pd.to_datetime(df["date"]) 100 | 101 | # Clean data. 102 | if data.omit_values_over and int(data.omit_values_over) > 0: 103 | df.loc[df[data.ga_metric] > data.omit_values_over, data.ga_metric] = np.nan 104 | 105 | df.loc[df[data.ga_metric] < 1, data.ga_metric] = np.nan 106 | 107 | df.dropna(inplace=True, axis=0) 108 | 109 | _LOG.info( 110 | "Rows: {rows} Min Date: {min_date} Max Date: {max_date}".format( 111 | rows=len(df), min_date=p_date(df.date.min()), max_date=p_date(df.date.max()) 112 | ) 113 | ) 114 | # Backfilling missing values 115 | df = df.set_index("date").asfreq("d", method="bfill") 116 | 117 | return df[data.ga_metric] 118 | -------------------------------------------------------------------------------- /src/forecastga/helpers/holidays.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """Forecastga: Holiday Helpers""" 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | from __future__ import unicode_literals 11 | 12 | import inspect 13 | import unicodedata 14 | 15 | import pandas as pd 16 | import numpy as np 17 | 18 | import holidays as hdays_part1 19 | import fbprophet.hdays as hdays_part2 20 | from fbprophet.make_holidays import make_holidays_df 21 | 22 | 23 | def utf8_to_ascii(text): 24 | """Holidays often have utf-8 characters. These are not allowed in R 25 | package data (they generate a NOTE). 26 | TODO: revisit whether we want to do this lossy conversion. 27 | """ 28 | ascii_text = ( 29 | unicodedata.normalize("NFD", text) 30 | .encode("ascii", "ignore") 31 | .decode("ascii") 32 | .strip() 33 | ) 34 | # Check if anything converted 35 | if sum(1 for x in ascii_text if x not in [" ", "(", ")", ","]) == 0: 36 | return "FAILED_TO_PARSE" 37 | else: 38 | return ascii_text 39 | 40 | 41 | def generate_holidays_file(): 42 | """Generate csv file of all possible holiday names, ds, 43 | and countries, year combination 44 | """ 45 | year_list = np.arange(1995, 2045, 1).tolist() 46 | all_holidays = [] 47 | # class names in holiday packages which are not countries 48 | # Also cut out countries with utf-8 holidays that don't parse to ascii 49 | class_to_exclude = set(["rd", "BY", "BG", "JP", "RS", "UA", "KR"]) 50 | 51 | class_list2 = inspect.getmembers(hdays_part2, inspect.isclass) 52 | country_set = set([name for name in list(zip(*class_list2))[0] if len(name) == 2]) 53 | class_list1 = inspect.getmembers(hdays_part1, inspect.isclass) 54 | country_set1 = set([name for name in list(zip(*class_list1))[0] if len(name) == 2]) 55 | country_set.update(country_set1) 56 | country_set -= class_to_exclude 57 | 58 | for country in country_set: 59 | df = make_holidays_df(year_list=year_list, country=country) 60 | df["country"] = country 61 | all_holidays.append(df) 62 | 63 | generated_holidays = pd.concat(all_holidays, axis=0, ignore_index=True) 64 | generated_holidays["year"] = generated_holidays.ds.apply(lambda x: x.year) 65 | generated_holidays.sort_values(["country", "ds", "holiday"], inplace=True) 66 | 67 | # Convert to ASCII, and drop holidays that fail to convert 68 | generated_holidays["holiday"] = generated_holidays["holiday"].apply(utf8_to_ascii) 69 | assert "FAILED_TO_PARSE" not in generated_holidays["holiday"].unique() 70 | generated_holidays.to_csv("../R/data-raw/generated_holidays.csv", index=False) 71 | -------------------------------------------------------------------------------- /src/forecastga/helpers/logging.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: Logging Utility""" 6 | 7 | import logging 8 | from logging import DEBUG, INFO, ERROR, Formatter, getLogger 9 | 10 | # file output 11 | FILE_HANDLER = logging.FileHandler(filename="forecastga.error.log") 12 | 13 | FILE_HANDLER.setFormatter( 14 | Formatter("%(asctime)s [%(levelname)s]" " %(name)s,%(lineno)s %(message)s") 15 | ) 16 | FILE_HANDLER.setLevel(DEBUG) 17 | 18 | # console output 19 | CONSOLE_HANDLER = logging.StreamHandler() 20 | CONSOLE_HANDLER.setLevel(ERROR) 21 | CONSOLE_HANDLER.setFormatter(Formatter("%(message)s")) 22 | 23 | SDCT_LOGGER = getLogger("forecastga") 24 | 25 | # add handlers 26 | SDCT_LOGGER.addHandler(CONSOLE_HANDLER) 27 | SDCT_LOGGER.addHandler(FILE_HANDLER) 28 | SDCT_LOGGER.setLevel(DEBUG) 29 | 30 | logging.captureWarnings(True) 31 | 32 | 33 | def get_logger(log_name, level=INFO): 34 | """ 35 | :param level: CRITICAL = 50 36 | ERROR = 40 37 | WARNING = 30 38 | INFO = 20 39 | DEBUG = 10 40 | NOTSET = 0 41 | :type log_name: str 42 | :type level: int 43 | """ 44 | module_logger = SDCT_LOGGER.getChild(log_name) 45 | if level: 46 | module_logger.setLevel(level) 47 | return module_logger 48 | -------------------------------------------------------------------------------- /src/forecastga/helpers/ssa.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: SSA Helper""" 6 | 7 | import numpy as np 8 | import pandas as pd 9 | from numpy import matrix as m 10 | from scipy import linalg 11 | 12 | 13 | class mySSA(object): 14 | """Singular Spectrum Analysis object""" 15 | 16 | def __init__(self, time_series): 17 | 18 | self.ts = pd.DataFrame(time_series) 19 | self.ts_name = self.ts.columns.tolist()[0] 20 | if self.ts_name == 0: 21 | self.ts_name = "ts" 22 | self.ts_v = self.ts.values 23 | self.ts_N = self.ts.shape[0] 24 | self.freq = self.ts.index.inferred_freq 25 | 26 | @staticmethod 27 | def _dot(x, y): 28 | """Alternative formulation of dot product to allow missing values in arrays/matrices""" 29 | pass 30 | 31 | @staticmethod 32 | def get_contributions(X=None, s=None, plot=True): 33 | """Calculate the relative contribution of each of the singular values""" 34 | lambdas = np.power(s, 2) 35 | frob_norm = np.linalg.norm(X) 36 | ret = pd.DataFrame(lambdas / (frob_norm ** 2), columns=["Contribution"]) 37 | ret["Contribution"] = ret.Contribution.round(4) 38 | if plot: 39 | ax = ret[ret.Contribution != 0].plot.bar(legend=False) 40 | ax.set_xlabel("Lambda_i") 41 | ax.set_title("Non-zero contributions of Lambda_i") 42 | vals = ax.get_yticks() 43 | ax.set_yticklabels(["{:3.2f}%".format(x * 100) for x in vals]) 44 | return ax 45 | return ret[ret.Contribution > 0] 46 | 47 | @staticmethod 48 | def diagonal_averaging(hankel_matrix): 49 | """Performs anti-diagonal averaging from given hankel matrix 50 | Returns: Pandas DataFrame object containing the reconstructed series""" 51 | mat = m(hankel_matrix) 52 | L, K = mat.shape 53 | L_star, K_star = min(L, K), max(L, K) 54 | # new = np.zeros((L, K)) 55 | if L > K: 56 | mat = mat.T 57 | ret = [] 58 | 59 | # Diagonal Averaging 60 | for k in range(1 - K_star, L_star): 61 | mask = np.eye(K_star, k=k, dtype="bool")[::-1][:L_star, :] 62 | mask_n = sum(sum(mask)) 63 | ma = np.ma.masked_array(mat.A, mask=1 - mask) 64 | ret += [ma.sum() / mask_n] 65 | 66 | return pd.DataFrame(ret).rename(columns={0: "Reconstruction"}) 67 | 68 | def view_time_series(self): 69 | """Plot the time series""" 70 | self.ts.plot(title="Original Time Series") 71 | 72 | def embed( 73 | self, 74 | embedding_dimension=None, 75 | suspected_frequency=None, 76 | verbose=False, 77 | return_df=False, 78 | ): 79 | """Embed the time series with embedding_dimension window size. 80 | Optional: suspected_frequency changes embedding_dimension such that it is divisible by suspected frequency""" 81 | if not embedding_dimension: 82 | self.embedding_dimension = self.ts_N // 2 83 | else: 84 | self.embedding_dimension = embedding_dimension 85 | if suspected_frequency: 86 | self.suspected_frequency = suspected_frequency 87 | self.embedding_dimension = ( 88 | self.embedding_dimension // self.suspected_frequency 89 | ) * self.suspected_frequency 90 | 91 | self.K = self.ts_N - self.embedding_dimension + 1 92 | self.X = m(linalg.hankel(self.ts, np.zeros(self.embedding_dimension))).T[ 93 | :, : self.K 94 | ] 95 | self.X_df = pd.DataFrame(self.X) 96 | self.X_complete = self.X_df.dropna(axis=1) 97 | self.X_com = m(self.X_complete.values) 98 | self.X_missing = self.X_df.drop(self.X_complete.columns, axis=1) 99 | self.X_miss = m(self.X_missing.values) 100 | self.trajectory_dimentions = self.X_df.shape 101 | self.complete_dimensions = self.X_complete.shape 102 | self.missing_dimensions = self.X_missing.shape 103 | self.no_missing = self.missing_dimensions[1] == 0 104 | 105 | if return_df: 106 | return self.X_df 107 | 108 | def decompose(self, verbose=False): 109 | """Perform the Singular Value Decomposition and identify the rank of the embedding subspace 110 | Characteristic of projection: the proportion of variance captured in the subspace""" 111 | X = self.X_com 112 | self.S = X * X.T 113 | self.U, self.s, self.V = linalg.svd(self.S) 114 | self.U, self.s, self.V = m(self.U), np.sqrt(self.s), m(self.V) 115 | self.d = np.linalg.matrix_rank(X) 116 | Vs, Xs, Ys, Zs = {}, {}, {}, {} 117 | for i in range(self.d): 118 | Zs[i] = self.s[i] * self.V[:, i] 119 | Vs[i] = X.T * (self.U[:, i] / self.s[i]) 120 | Ys[i] = self.s[i] * self.U[:, i] 121 | Xs[i] = Ys[i] * (m(Vs[i]).T) 122 | self.Vs, self.Xs = Vs, Xs 123 | self.s_contributions = self.get_contributions(X, self.s, False) 124 | self.r = len(self.s_contributions[self.s_contributions > 0]) 125 | self.r_characteristic = round( 126 | (self.s[: self.r] ** 2).sum() / (self.s ** 2).sum(), 4 127 | ) 128 | self.orthonormal_base = {i: self.U[:, i] for i in range(self.r)} 129 | 130 | def view_s_contributions( 131 | self, adjust_scale=False, cumulative=False, return_df=False 132 | ): 133 | """View the contribution to variance of each singular value and its corresponding signal""" 134 | contribs = self.s_contributions.copy() 135 | contribs = contribs[contribs.Contribution != 0] 136 | if cumulative: 137 | contribs["Contribution"] = contribs.Contribution.cumsum() 138 | if adjust_scale: 139 | contribs = (1 / contribs).max() * 1.1 - (1 / contribs) 140 | ax = contribs.plot.bar(legend=False) 141 | ax.set_xlabel("Singular_i") 142 | ax.set_title( 143 | "Non-zero{} contribution of Singular_i {}".format( 144 | " cumulative" if cumulative else "", "(scaled)" if adjust_scale else "" 145 | ) 146 | ) 147 | if adjust_scale: 148 | ax.axes.get_yaxis().set_visible(False) 149 | vals = ax.get_yticks() 150 | ax.set_yticklabels(["{:3.0f}%".format(x * 100) for x in vals]) 151 | if return_df: 152 | return contribs 153 | 154 | @classmethod 155 | def view_reconstruction( 156 | cls, *hankel, names=None, return_df=False, plot=True, symmetric_plots=False 157 | ): 158 | """Visualise the reconstruction of the hankel matrix/matrices passed to *hankel""" 159 | hankel_mat = None 160 | for han in hankel: 161 | if isinstance(hankel_mat, m): 162 | hankel_mat = hankel_mat + han 163 | else: 164 | hankel_mat = han.copy() 165 | hankel_full = cls.diagonal_averaging(hankel_mat) 166 | title = "Reconstruction of signal" 167 | if names or names == 0: 168 | title += " associated with singular value{}: {}" 169 | title = title.format("" if len(str(names)) == 1 else "s", names) 170 | if plot: 171 | ax = hankel_full.plot(legend=False, title=title) 172 | if symmetric_plots: 173 | velocity = hankel_full.abs().max()[0] 174 | ax.set_ylim(bottom=-velocity, top=velocity) 175 | if return_df: 176 | return hankel_full 177 | 178 | def _forecast_prep(self, singular_values=None): 179 | self.X_com_hat = np.zeros(self.complete_dimensions) 180 | self.verticality_coefficient = 0 181 | self.forecast_orthonormal_base = {} 182 | if singular_values: 183 | try: 184 | for i in singular_values: 185 | self.forecast_orthonormal_base[i] = self.orthonormal_base[i] 186 | except: 187 | if singular_values == 0: 188 | self.forecast_orthonormal_base[0] = self.orthonormal_base[0] 189 | else: 190 | raise ( 191 | "Please pass in a list/array of singular value indices to use for forecast" 192 | ) 193 | else: 194 | self.forecast_orthonormal_base = self.orthonormal_base 195 | self.R = np.zeros(self.forecast_orthonormal_base[0].shape)[:-1] 196 | for Pi in self.forecast_orthonormal_base.values(): 197 | self.X_com_hat += Pi * Pi.T * self.X_com 198 | pi = np.ravel(Pi)[-1] 199 | self.verticality_coefficient += pi ** 2 200 | self.R += pi * Pi[:-1] 201 | self.R = m(self.R / (1 - self.verticality_coefficient)) 202 | self.X_com_tilde = self.diagonal_averaging(self.X_com_hat) 203 | 204 | def forecast_recurrent( 205 | self, 206 | steps_ahead=12, 207 | singular_values=None, 208 | plot=False, 209 | return_df=False, 210 | **plotargs 211 | ): 212 | """Forecast from last point of original time series up to steps_ahead using recurrent methodology 213 | This method also fills any missing data from the original time series.""" 214 | try: 215 | self.X_com_hat 216 | except (AttributeError): 217 | self._forecast_prep(singular_values) 218 | self.ts_forecast = np.array(self.ts_v[0]) 219 | for i in range(1, self.ts_N + steps_ahead): 220 | try: 221 | if np.isnan(self.ts_v[i]): 222 | x = ( 223 | self.R.T 224 | * m(self.ts_forecast[max(0, i - self.R.shape[0]) : i]).T 225 | ) 226 | self.ts_forecast = np.append(self.ts_forecast, x[0]) 227 | else: 228 | self.ts_forecast = np.append(self.ts_forecast, self.ts_v[i]) 229 | except (IndexError): 230 | x = self.R.T * m(self.ts_forecast[i - self.R.shape[0] : i]).T 231 | self.ts_forecast = np.append(self.ts_forecast, x[0]) 232 | self.forecast_N = i + 1 233 | new_index = pd.date_range( 234 | start=self.ts.index.min(), periods=self.forecast_N, freq=self.freq 235 | ) 236 | forecast_df = pd.DataFrame( 237 | self.ts_forecast, columns=["Forecast"], index=new_index 238 | ) 239 | forecast_df["Original"] = np.append(self.ts_v, [np.nan] * steps_ahead) 240 | if plot: 241 | forecast_df.plot(title="Forecasted vs. original time series", **plotargs) 242 | if return_df: 243 | return forecast_df 244 | -------------------------------------------------------------------------------- /src/forecastga/models/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | """ ForecastGA: Models """ 5 | 6 | MODELS = { 7 | "ARIMA": { 8 | "name": "ARIMA", 9 | "description": "Automated ARIMA Modelling", 10 | "loc": "forecastga.models.arima", 11 | "class": "ARIMA_Model", 12 | "status": "active", 13 | }, 14 | "Prophet": { 15 | "name": "Prophet", 16 | "description": "Modeling Multiple Seasonality With Linear or Non-linear Growth", 17 | "loc": "forecastga.models.prophet", 18 | "class": "Prophet_Model", 19 | "status": "active", 20 | }, 21 | "ProphetBC": { 22 | "name": "ProphetBC", 23 | "description": "Prophet Model with Box-Cox transform of the data", 24 | "loc": "forecastga.models.prophet_boxcox", 25 | "class": "Prophet_BoxCox_Model", 26 | "status": "active", 27 | }, 28 | "SARIMA": { 29 | "name": "SARIMA", 30 | "description": "A seasonal autoregressive integrated moving average (SARIMA) model", 31 | "loc": "forecastga.models.sarima", 32 | "class": "SARIMA_Model", 33 | "status": "not implemented", 34 | }, 35 | "SARIMAX": { 36 | "name": "SARIMAX", 37 | "description": "Example: https://gist.github.com/natzir/befe1ff229fc2d0c01e0411d5fdd5209", 38 | "loc": "forecastga.models.sarimax", 39 | "class": "SARIMAX_Model", 40 | "status": "not implemented", 41 | }, 42 | "HWAAS": { 43 | "name": "HWAAS", 44 | "description": "Exponential Smoothing With Additive Trend and Additive Seasonality", 45 | "loc": "forecastga.models.hwaas", 46 | "class": "HWAAS_Model", 47 | "status": "active", 48 | }, 49 | "HWAMS": { 50 | "name": "HWAMS", 51 | "description": "Exponential Smoothing with Additive Trend and Multiplicative Seasonality", 52 | "loc": "forecastga.models.hwams", 53 | "class": "HWAMS_Model", 54 | "status": "active", 55 | }, 56 | "NBEATS": { 57 | "name": "NBEATS", 58 | "description": "Neural basis expansion analysis (now fixed at 20 Epochs)", 59 | "loc": "forecastga.models.nbeats", 60 | "class": "NBEATS_Model", 61 | "status": "active", 62 | }, 63 | "Gluonts": { 64 | "name": "Gluonts", 65 | "description": "RNN-based Model (now fixed at 20 Epochs)", 66 | "loc": "forecastga.models.gluonts", 67 | "class": "Gluonts_Model", 68 | "status": "active", 69 | }, 70 | "TATS": { 71 | "name": "TATS", 72 | "description": "Seasonal and Trend no Box Cox", 73 | "loc": "forecastga.models.tats", 74 | "class": "TATS_Model", 75 | "status": "active", 76 | }, 77 | "TBAT": { 78 | "name": "TBAT", 79 | "description": "Trend and Box Cox", 80 | "loc": "forecastga.models.tbat", 81 | "class": "TBAT_Model", 82 | "status": "active", 83 | }, 84 | "TBATS1": { 85 | "name": "TBATS1", 86 | "description": "Trend, Seasonal (one), and Box Cox", 87 | "loc": "forecastga.models.tbats1", 88 | "class": "TBATS1_Model", 89 | "status": "active", 90 | }, 91 | "TBATP1": { 92 | "name": "TBATP1", 93 | "description": "TBATS1 but Seasonal Inference is Hardcoded by Periodicity", 94 | "loc": "forecastga.models.tbatp1", 95 | "class": "TBATP1_Model", 96 | "status": "active", 97 | }, 98 | "TBATS2": { 99 | "name": "TBATS2", 100 | "description": "TBATS1 With Two Seasonal Periods", 101 | "loc": "forecastga.models.tbats2", 102 | "class": "TBATS2_Model", 103 | "status": "active", 104 | }, 105 | "PYAF": { 106 | "name": "PYAF", 107 | "description": "PYAF", 108 | "loc": "forecastga.models.pyaf", 109 | "class": "PYAF_Model", 110 | "status": "active but not tested", 111 | }, 112 | } 113 | -------------------------------------------------------------------------------- /src/forecastga/models/arima.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: ARIMA Model""" 6 | 7 | import pmdarima as pm 8 | 9 | from forecastga.models.base import BaseModel 10 | 11 | 12 | class ARIMA_Model(BaseModel): 13 | """ARIMA Model Class""" 14 | 15 | def __init__(self, config): 16 | super().__init__(config) 17 | 18 | def train(self, **kwargs): 19 | self.model = pm.auto_arima(self.train_df, seasonal=True, m=self.seasons) 20 | 21 | def forecast(self): 22 | self.prediction = self.model.predict(self.forecast_len) 23 | -------------------------------------------------------------------------------- /src/forecastga/models/base.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: Base Model""" 6 | 7 | 8 | class BaseModel: 9 | """Base Model class of ForecastGA""" 10 | 11 | def __init__(self, config): 12 | 13 | if config.in_sample is None: 14 | raise ValueError( 15 | "The config class must be initialized with \ 16 | `set_in_sample()` or `set_out_sample()` prior to \ 17 | passing to a model." 18 | ) 19 | 20 | self.seasonality = config.seasonality 21 | self.forecast_len = config.forecast_len 22 | self.freq = config.freq 23 | self.in_sample = config.in_sample 24 | self.GPU = config.GPU 25 | 26 | self.dataframe = config.dataframe 27 | self.train_df = config.train_df 28 | self.forecast_df = config.forecast_df 29 | self.seasons = config.seasons 30 | self.periods = config.periods 31 | self.model = None 32 | self.prediction = None 33 | 34 | def train(self): 35 | raise NotImplementedError 36 | 37 | def forecast(self): 38 | raise NotImplementedError 39 | -------------------------------------------------------------------------------- /src/forecastga/models/gluonts.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: Gluonts Model""" 6 | import pandas as pd 7 | 8 | from gluonts.model.deepar import DeepAREstimator 9 | from gluonts.trainer import Trainer 10 | from gluonts.dataset.common import ListDataset 11 | 12 | from forecastga.models.base import BaseModel 13 | 14 | 15 | class Gluonts_Model(BaseModel): 16 | """Gluonts Model Class""" 17 | 18 | def __init__(self, config): 19 | super().__init__(config) 20 | 21 | def train(self, **kwargs): 22 | 23 | epochs = kwargs.get("epochs", 10) 24 | 25 | # Adjust class freq. 26 | self.freq = pd.infer_freq(self.train_df.index) 27 | if self.freq == "MS": 28 | self.freq = "M" 29 | 30 | estimator = DeepAREstimator( 31 | freq=self.freq, 32 | prediction_length=self.forecast_len, 33 | trainer=Trainer( 34 | epochs=epochs, batch_size=64, ctx="gpu" if self.GPU else "cpu" 35 | ), 36 | ) 37 | 38 | self.model = estimator.train( 39 | training_data=self.format_input(self.train_df, self.freq) 40 | ) 41 | 42 | def forecast(self): 43 | 44 | if self.in_sample: 45 | forecast = self.model.predict( 46 | self.format_input( 47 | self.dataframe["Target"], 48 | self.freq, 49 | self.train_df.index[-1] 50 | + self.train_df.index.to_series().diff().min(), 51 | ) 52 | ) 53 | else: 54 | forecast = self.model.predict( 55 | self.format_input( 56 | self.dataframe["Target"], 57 | self.freq, 58 | self.dataframe["Target"].index[-1] 59 | + self.train_df.index.to_series().diff().min(), 60 | ) 61 | ) 62 | 63 | self.prediction = list(forecast)[0].samples.mean(axis=0) # .quantile(0.5) 64 | 65 | @staticmethod 66 | def format_input(df, freq, target=None): 67 | if target: 68 | return ListDataset( 69 | [{"start": df.index[0], "target": df.to_frame().Target[:target]}], 70 | freq=freq, 71 | ) 72 | 73 | return ListDataset( 74 | [{"start": df.index[0], "target": df.to_frame().Target}], freq=freq 75 | ) 76 | -------------------------------------------------------------------------------- /src/forecastga/models/hwaas.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: HWAAS Model""" 6 | 7 | from statsmodels.tsa.holtwinters import ExponentialSmoothing 8 | 9 | from forecastga.models.base import BaseModel 10 | 11 | 12 | class HWAAS_Model(BaseModel): 13 | """HWAAS Model Class""" 14 | 15 | def __init__(self, config): 16 | super().__init__(config) 17 | 18 | def train(self, **kwargs): 19 | for i in range(2): 20 | use_boxcox = [True, False][i] 21 | try: 22 | self.model = ExponentialSmoothing( 23 | self.train_df, 24 | seasonal_periods=self.seasons, 25 | trend="add", 26 | seasonal="add", 27 | damped_trend=True, 28 | ).fit(use_boxcox=use_boxcox) 29 | break 30 | except: 31 | continue 32 | 33 | def forecast(self): 34 | self.prediction = self.model.forecast(self.forecast_len) 35 | -------------------------------------------------------------------------------- /src/forecastga/models/hwams.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: HWAMS Model""" 6 | 7 | from statsmodels.tsa.holtwinters import ExponentialSmoothing 8 | 9 | from forecastga.models.base import BaseModel 10 | 11 | 12 | class HWAMS_Model(BaseModel): 13 | """HWAMS Model Class""" 14 | 15 | def __init__(self, config): 16 | super().__init__(config) 17 | 18 | def train(self, **kwargs): 19 | 20 | for i in range(3): 21 | 22 | params = [ 23 | {"trend": "add", "seasonal": "mul", "use_boxcox": True}, 24 | {"trend": "add", "seasonal": "mul", "use_boxcox": False}, 25 | {"trend": None, "seasonal": "add", "use_boxcox": False}, 26 | ] 27 | try: 28 | self.model = ExponentialSmoothing( 29 | self.train_df, 30 | seasonal_periods=self.seasons, 31 | trend=params[i]["trend"], 32 | seasonal=params[i]["seasonal"], 33 | damped_trend=True, 34 | ).fit(use_boxcox=params[i]["use_boxcox"]) 35 | break 36 | 37 | except: 38 | continue 39 | 40 | def forecast(self): 41 | self.prediction = self.model.forecast(self.forecast_len) 42 | -------------------------------------------------------------------------------- /src/forecastga/models/nbeats.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: NBEATS Model""" 6 | 7 | import os 8 | import numpy as np 9 | 10 | import torch 11 | from torch import optim 12 | from torch.nn import functional as F 13 | from tqdm.auto import tqdm 14 | from nbeats_pytorch.model import ( 15 | NBeatsNet, 16 | ) 17 | 18 | 19 | from forecastga.models.base import BaseModel 20 | 21 | from forecastga.helpers.data import data_generator 22 | 23 | 24 | CHECKPOINT_NAME = "nbeats-training-checkpoint.th" 25 | 26 | 27 | class NBEATS_Model(BaseModel): 28 | """NBEATS Model Class""" 29 | 30 | def __init__(self, config): 31 | super().__init__(config) 32 | 33 | def train(self, **kwargs): 34 | """Train NBEATS Model""" 35 | 36 | if os.path.isfile(CHECKPOINT_NAME): 37 | os.remove(CHECKPOINT_NAME) 38 | 39 | steps = kwargs.get("steps", 50) 40 | batch_size = kwargs.get("batch_size", 10) 41 | patience = kwargs.get("patience", 5) 42 | device = self.get_device() 43 | 44 | # There seems to be an issue with 45 | net = NBeatsNet( 46 | stack_types=[ 47 | NBeatsNet.TREND_BLOCK, 48 | NBeatsNet.SEASONALITY_BLOCK, 49 | NBeatsNet.GENERIC_BLOCK, 50 | ], 51 | forecast_length=self.forecast_len, 52 | thetas_dims=kwargs.get("thetas_dims", [2, 8, 3]), 53 | nb_blocks_per_stack=kwargs.get("nb_blocks_per_stack", 3), 54 | backcast_length=self.forecast_len, 55 | hidden_layer_units=kwargs.get("hidden_layer_units", 128), 56 | share_weights_in_stack=False, 57 | device=device, 58 | ) 59 | 60 | x_batch, y_batch, norm_constant = self.format_input( 61 | self.dataframe, self.forecast_len 62 | ) 63 | 64 | c = len(x_batch) 65 | if self.in_sample: 66 | c -= self.forecast_len 67 | 68 | optimiser = optim.Adam(net.parameters()) 69 | 70 | data = data_generator(x_batch[:c], y_batch[:c], batch_size) 71 | 72 | best_loss = float("inf") 73 | counter = 0 74 | 75 | for _ in tqdm(range(steps)): 76 | loss = self.train_100_grad_steps(data, device, net, optimiser) 77 | if loss < best_loss: 78 | best_loss, counter = loss, 0 79 | else: 80 | counter += 1 81 | if counter >= patience: 82 | break 83 | 84 | self.model = net 85 | self.constant = norm_constant 86 | 87 | def forecast(self, **kwargs): 88 | """Forecast NEATS Model""" 89 | 90 | x_batch, _, _ = self.format_input( 91 | self.dataframe, self.forecast_len, constant=self.constant 92 | ) 93 | 94 | c = len(x_batch) 95 | if self.in_sample: 96 | c -= self.forecast_len 97 | 98 | self.model.eval() 99 | 100 | if self.in_sample: 101 | _, forecast = self.model(torch.tensor(x_batch[c:], dtype=torch.float)) 102 | else: 103 | _, forecast = self.model(torch.tensor(x_batch, dtype=torch.float)) 104 | 105 | p = forecast.cpu().detach().numpy() if self.GPU else forecast.detach().numpy() 106 | self.prediction = p[-1] * self.constant 107 | 108 | def get_device(self): 109 | return torch.device("cuda") if self.GPU else torch.device("cpu") 110 | 111 | @staticmethod 112 | def format_input(df, forecast_length, constant=None): 113 | 114 | backcast_length = 1 * forecast_length 115 | 116 | x = df.values 117 | norm_constant = constant if constant else np.max(x) 118 | x = x / norm_constant 119 | 120 | x_batch, y_batch = [], [] 121 | 122 | # Batches the results into x_train_batch: x and y_train_batch: x + forecast_length 123 | for i in range(backcast_length + 1, len(x) - forecast_length + 1): 124 | x_batch.append(x[i - backcast_length : i]) 125 | y_batch.append(x[i : i + forecast_length]) 126 | 127 | x_batch = np.array(x_batch)[..., 0] 128 | y_batch = np.array(y_batch)[..., 0] 129 | 130 | return x_batch, y_batch, norm_constant 131 | 132 | def train_100_grad_steps(self, data, device, net, optimiser): 133 | """Trainer""" 134 | global_step = self.load(net, optimiser) 135 | global_step_init = global_step 136 | step_loss = 0 137 | for x_train_batch, y_train_batch in data: 138 | global_step += 1 139 | optimiser.zero_grad() 140 | net.train() 141 | _, forecast = net(torch.tensor(x_train_batch, dtype=torch.float).to(device)) 142 | loss = F.mse_loss( 143 | forecast, torch.tensor(y_train_batch, dtype=torch.float).to(device) 144 | ) 145 | step_loss += loss.item() 146 | loss.backward() 147 | optimiser.step() 148 | if global_step > 0 and global_step % 100 == 0: 149 | with torch.no_grad(): 150 | self.save(net, optimiser, global_step) 151 | break 152 | 153 | return step_loss / (global_step - global_step_init) 154 | 155 | def load(self, model, optimiser): 156 | """loader/saver for checkpoints""" 157 | 158 | if os.path.exists(CHECKPOINT_NAME): 159 | checkpoint = torch.load(CHECKPOINT_NAME) 160 | model.load_state_dict(checkpoint["model_state_dict"]) 161 | optimiser.load_state_dict(checkpoint["optimizer_state_dict"]) 162 | grad_step = checkpoint["grad_step"] 163 | 164 | return grad_step 165 | return 0 166 | 167 | def save(self, model, optimiser, grad_step): 168 | torch.save( 169 | { 170 | "grad_step": grad_step, 171 | "model_state_dict": model.state_dict(), 172 | "optimizer_state_dict": optimiser.state_dict(), 173 | }, 174 | CHECKPOINT_NAME, 175 | ) 176 | -------------------------------------------------------------------------------- /src/forecastga/models/prophet.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: Prophet Model""" 6 | 7 | import pandas as pd 8 | from fbprophet import Prophet 9 | 10 | from forecastga.models.base import BaseModel 11 | 12 | 13 | class Prophet_Model(BaseModel): 14 | """Prophet Model Class""" 15 | 16 | def __init__(self, config): 17 | super().__init__(config) 18 | 19 | def train(self, **kwargs): 20 | 21 | country_holidays = kwargs.get("country_holidays", "US") 22 | 23 | if self.freq == "D": 24 | ptm = Prophet(weekly_seasonality=True) 25 | else: 26 | ptm = Prophet() 27 | 28 | ptm.add_country_holidays(country_name=country_holidays) 29 | 30 | self.model = ptm.fit(self.format_input(self.train_df)) 31 | 32 | def forecast(self): 33 | future = self.model.make_future_dataframe( 34 | periods=self.forecast_len, freq=self.freq 35 | ) 36 | future_pred = self.model.predict(future) 37 | self.prediction = self.format_output(future_pred)[-self.forecast_len :] 38 | 39 | @staticmethod 40 | def format_input(df): 41 | df_pr = df.reset_index() 42 | df_pr.columns = ["ds", "y"] 43 | return df_pr 44 | 45 | @staticmethod 46 | def format_output(df): 47 | prophet_pred = pd.DataFrame({"Date": df["ds"], "Target": df["yhat"]}) 48 | prophet_pred = prophet_pred.set_index("Date") 49 | return prophet_pred["Target"].values 50 | -------------------------------------------------------------------------------- /src/forecastga/models/prophet_boxcox.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: Prophet Model with Box-Cox transform of the data""" 6 | 7 | import pandas as pd 8 | from fbprophet import Prophet 9 | from scipy.stats import boxcox 10 | from scipy.special import inv_boxcox 11 | 12 | from forecastga.models.base import BaseModel 13 | 14 | 15 | class Prophet_BoxCox_Model(BaseModel): 16 | """Prophet Boxcox Model Class""" 17 | 18 | def __init__(self, config): 19 | self.boxcox_lambda = None 20 | super().__init__(config) 21 | 22 | def train(self, **kwargs): 23 | 24 | country_holidays = kwargs.get("country_holidays", "US") 25 | 26 | if self.freq == "D": 27 | ptm = Prophet(weekly_seasonality=True) 28 | else: 29 | ptm = Prophet() 30 | 31 | ptm.add_country_holidays(country_name=country_holidays) 32 | 33 | formatted_data = self.format_input(self.train_df) 34 | transformed_y, self.boxcox_lambda = boxcox(formatted_data["y"] + 1) 35 | formatted_data["y"] = transformed_y 36 | 37 | self.model = ptm.fit(formatted_data) 38 | 39 | def forecast(self): 40 | future = self.model.make_future_dataframe( 41 | periods=self.forecast_len, freq=self.freq 42 | ) 43 | future_pred = self.model.predict(future) 44 | future_pred = future_pred[-self.forecast_len :] 45 | if self.boxcox_lambda: 46 | future_pred["yhat"] = ( 47 | inv_boxcox(future_pred["yhat"], self.boxcox_lambda) - 1 48 | ) 49 | self.prediction = self.format_output(future_pred) 50 | 51 | @staticmethod 52 | def format_input(df): 53 | df_pr = df.reset_index() 54 | df_pr.columns = ["ds", "y"] 55 | return df_pr 56 | 57 | @staticmethod 58 | def format_output(df): 59 | prophet_pred = pd.DataFrame({"Date": df["ds"], "Target": df["yhat"]}) 60 | prophet_pred = prophet_pred.set_index("Date") 61 | return prophet_pred["Target"].values 62 | -------------------------------------------------------------------------------- /src/forecastga/models/pyaf.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: PYAF Model""" 6 | 7 | import pyaf.ForecastEngine as autof 8 | 9 | from forecastga.models.base import BaseModel 10 | 11 | 12 | class PYAF_Model(BaseModel): 13 | """PYAF Model Class""" 14 | 15 | def __init__(self, config): 16 | super().__init__(config) 17 | 18 | def train(self, **kwargs): 19 | self.model = autof() 20 | self.model.train( 21 | iInputDS=self.train_df.reset_index(), 22 | iTime="Date", 23 | iSignal="Target", 24 | iHorizon=len(self.train_df), 25 | ) 26 | 27 | def forecast(self): 28 | self.model.forecast( 29 | iInputDS=self.train_df.reset_index(), iHorizon=self.forecast_len 30 | ) 31 | self.prediction = self.model["Target_Forecast"][-self.forecast_len :].values 32 | -------------------------------------------------------------------------------- /src/forecastga/models/sarima.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: SARIMA Model""" 6 | 7 | from forecastga.models.base import BaseModel 8 | 9 | 10 | class SARIMA_Model: 11 | """SARIMA Model Class""" 12 | 13 | def __init__(self): 14 | raise NotImplementedError 15 | 16 | def dataframe(self): 17 | raise NotImplementedError 18 | 19 | def train(self): 20 | raise NotImplementedError 21 | 22 | def forecast(self): 23 | raise NotImplementedError 24 | -------------------------------------------------------------------------------- /src/forecastga/models/sarimax.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: SARIMAX Model""" 6 | 7 | from forecastga.models.base import BaseModel 8 | 9 | 10 | class SARIMAX_Model: 11 | """ARIMA Model Class""" 12 | 13 | def __init__(self): 14 | raise NotImplementedError 15 | 16 | def dataframe(self): 17 | raise NotImplementedError 18 | 19 | def train(self): 20 | raise NotImplementedError 21 | 22 | def forecast(self): 23 | raise NotImplementedError 24 | -------------------------------------------------------------------------------- /src/forecastga/models/tats.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: TATS Model""" 6 | 7 | from tbats import TBATS 8 | 9 | from forecastga.helpers.data import get_unique_N, season_list 10 | 11 | from forecastga.models.base import BaseModel 12 | 13 | 14 | class TATS_Model(BaseModel): 15 | """TATS Model Class""" 16 | 17 | def __init__(self, config): 18 | super().__init__(config) 19 | 20 | def train(self, **kwargs): 21 | bat = TBATS( 22 | seasonal_periods=list(get_unique_N(season_list(self.train_df), 1)), 23 | use_arma_errors=False, 24 | use_trend=True, 25 | ) 26 | self.model = bat.fit(self.train_df) 27 | 28 | def forecast(self): 29 | self.prediction = self.model.forecast(self.forecast_len) 30 | -------------------------------------------------------------------------------- /src/forecastga/models/tbat.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """TBAT Model""" 6 | 7 | from tbats import TBATS 8 | 9 | from forecastga.models.base import BaseModel 10 | 11 | 12 | class TBAT_Model(BaseModel): 13 | """TBAT Model Class""" 14 | 15 | def __init__(self, config): 16 | super().__init__(config) 17 | 18 | def train(self, **kwargs): 19 | bat = TBATS(use_arma_errors=False, use_box_cox=True, use_trend=True) 20 | self.model = bat.fit(self.train_df) 21 | 22 | def forecast(self): 23 | self.prediction = self.model.forecast(self.forecast_len) 24 | -------------------------------------------------------------------------------- /src/forecastga/models/tbatp1.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: TBATP1 Model (TBATS1 but Seasonal Inference is Hardcoded by Periodicity)""" 6 | 7 | from tbats import TBATS 8 | 9 | from forecastga.models.base import BaseModel 10 | 11 | 12 | class TBATP1_Model(BaseModel): 13 | """TBATP1 Model Class""" 14 | 15 | def __init__(self, config): 16 | super().__init__(config) 17 | 18 | def train(self, **kwargs): 19 | bat = TBATS( 20 | seasonal_periods=[self.periods], 21 | use_arma_errors=False, 22 | use_box_cox=True, 23 | use_trend=True, 24 | ) 25 | self.model = bat.fit(self.train_df) 26 | 27 | def forecast(self): 28 | self.prediction = self.model.forecast(self.forecast_len) 29 | -------------------------------------------------------------------------------- /src/forecastga/models/tbats1.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ForecastGA: TBATS1 Model (Trend, Seasonal-one, and Box Cox)""" 6 | 7 | from tbats import TBATS 8 | 9 | from forecastga.models.base import BaseModel 10 | 11 | 12 | class TBATS1_Model(BaseModel): 13 | """TBATS1 Model Class""" 14 | 15 | def __init__(self, config): 16 | super().__init__(config) 17 | 18 | def train(self, **kwargs): 19 | bat = TBATS( 20 | seasonal_periods=[self.seasons], 21 | use_arma_errors=False, 22 | use_box_cox=True, 23 | use_trend=True, 24 | ) 25 | self.model = bat.fit(self.train_df) 26 | 27 | def forecast(self): 28 | self.prediction = self.model.forecast(self.forecast_len) 29 | -------------------------------------------------------------------------------- /src/forecastga/models/tbats2.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """TForecastGA: BATS2 Model (TBATS1 With Two Seasonal Periods)""" 6 | 7 | from tbats import TBATS 8 | 9 | from forecastga.helpers.data import get_unique_N, season_list 10 | 11 | from forecastga.models.base import BaseModel 12 | 13 | 14 | class TBATS2_Model(BaseModel): 15 | """TBATS2 Model Class""" 16 | 17 | def __init__(self, config): 18 | super().__init__(config) 19 | 20 | def train(self, **kwargs): 21 | bat = TBATS( 22 | seasonal_periods=list(get_unique_N(season_list(self.train_df), 2)), 23 | use_arma_errors=False, 24 | use_box_cox=True, 25 | use_trend=True, 26 | ) 27 | self.model = bat.fit(self.train_df) 28 | 29 | def forecast(self): 30 | self.prediction = self.model.forecast(self.forecast_len) 31 | -------------------------------------------------------------------------------- /src/forecastga/models/template.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding: utf-8 3 | # 4 | 5 | """ARIMA Model""" 6 | 7 | from forecastga.models.base import BaseModel 8 | 9 | 10 | class ARIMA_Model: 11 | """ARIMA Model Class""" 12 | 13 | def __init__(self, config): 14 | super().__init__(config) 15 | 16 | """ 17 | Available model attributes: 18 | 19 | self.seasonality (str) 20 | self.forecast_len (int) 21 | self.freq (str) 22 | self.train_proportion (float) 23 | self.in_sample (bool) 24 | self.GPU (bool) 25 | 26 | self.dataframe (pd.Series) 27 | self.train_df (pd.Series) 28 | self.forecast_df (pd.Series) or None 29 | self.seasons (int) 30 | self.periods (int) 31 | """ 32 | 33 | def train(self): 34 | self.model = pm.auto_arima(self.train_df, seasonal=True, m=self.seasons) 35 | 36 | def forecast(self): 37 | if self.insample: 38 | self.prediction = self.model.predict(self.forecast_len) 39 | # Prediction can be a list, np.Array, or pandas series. 40 | else: 41 | # Do something else if outsample. 42 | pass 43 | 44 | @staticmethod 45 | def format_input(df, forecast_length, constant=None): 46 | pass 47 | 48 | @staticmethod 49 | def format_output(df, forecast_length, constant=None): 50 | pass 51 | -------------------------------------------------------------------------------- /src/forecastga/stan/unix/prophet.stan: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | functions { 7 | matrix get_changepoint_matrix(vector t, vector t_change, int T, int S) { 8 | // Assumes t and t_change are sorted. 9 | matrix[T, S] A; 10 | row_vector[S] a_row; 11 | int cp_idx; 12 | 13 | // Start with an empty matrix. 14 | A = rep_matrix(0, T, S); 15 | a_row = rep_row_vector(0, S); 16 | cp_idx = 1; 17 | 18 | // Fill in each row of A. 19 | for (i in 1:T) { 20 | while ((cp_idx <= S) && (t[i] >= t_change[cp_idx])) { 21 | a_row[cp_idx] = 1; 22 | cp_idx = cp_idx + 1; 23 | } 24 | A[i] = a_row; 25 | } 26 | return A; 27 | } 28 | 29 | // Logistic trend functions 30 | 31 | vector logistic_gamma(real k, real m, vector delta, vector t_change, int S) { 32 | vector[S] gamma; // adjusted offsets, for piecewise continuity 33 | vector[S + 1] k_s; // actual rate in each segment 34 | real m_pr; 35 | 36 | // Compute the rate in each segment 37 | k_s = append_row(k, k + cumulative_sum(delta)); 38 | 39 | // Piecewise offsets 40 | m_pr = m; // The offset in the previous segment 41 | for (i in 1:S) { 42 | gamma[i] = (t_change[i] - m_pr) * (1 - k_s[i] / k_s[i + 1]); 43 | m_pr = m_pr + gamma[i]; // update for the next segment 44 | } 45 | return gamma; 46 | } 47 | 48 | vector logistic_trend( 49 | real k, 50 | real m, 51 | vector delta, 52 | vector t, 53 | vector cap, 54 | matrix A, 55 | vector t_change, 56 | int S 57 | ) { 58 | vector[S] gamma; 59 | 60 | gamma = logistic_gamma(k, m, delta, t_change, S); 61 | return cap .* inv_logit((k + A * delta) .* (t - (m + A * gamma))); 62 | } 63 | 64 | // Linear trend function 65 | 66 | vector linear_trend( 67 | real k, 68 | real m, 69 | vector delta, 70 | vector t, 71 | matrix A, 72 | vector t_change 73 | ) { 74 | return (k + A * delta) .* t + (m + A * (-t_change .* delta)); 75 | } 76 | 77 | // Flat trend function 78 | 79 | vector flat_trend( 80 | real m, 81 | int T 82 | ) { 83 | return rep_vector(m, T); 84 | } 85 | } 86 | 87 | data { 88 | int T; // Number of time periods 89 | int K; // Number of regressors 90 | vector[T] t; // Time 91 | vector[T] cap; // Capacities for logistic trend 92 | vector[T] y; // Time series 93 | int S; // Number of changepoints 94 | vector[S] t_change; // Times of trend changepoints 95 | matrix[T,K] X; // Regressors 96 | vector[K] sigmas; // Scale on seasonality prior 97 | real tau; // Scale on changepoints prior 98 | int trend_indicator; // 0 for linear, 1 for logistic, 2 for flat 99 | vector[K] s_a; // Indicator of additive features 100 | vector[K] s_m; // Indicator of multiplicative features 101 | } 102 | 103 | transformed data { 104 | matrix[T, S] A; 105 | A = get_changepoint_matrix(t, t_change, T, S); 106 | } 107 | 108 | parameters { 109 | real k; // Base trend growth rate 110 | real m; // Trend offset 111 | vector[S] delta; // Trend rate adjustments 112 | real sigma_obs; // Observation noise 113 | vector[K] beta; // Regressor coefficients 114 | } 115 | 116 | transformed parameters { 117 | vector[T] trend; 118 | if (trend_indicator == 0) { 119 | trend = linear_trend(k, m, delta, t, A, t_change); 120 | } else if (trend_indicator == 1) { 121 | trend = logistic_trend(k, m, delta, t, cap, A, t_change, S); 122 | } else if (trend_indicator == 2) { 123 | trend = flat_trend(m, T); 124 | } 125 | } 126 | 127 | model { 128 | //priors 129 | k ~ normal(0, 5); 130 | m ~ normal(0, 5); 131 | delta ~ double_exponential(0, tau); 132 | sigma_obs ~ normal(0, 0.5); 133 | beta ~ normal(0, sigmas); 134 | 135 | // Likelihood 136 | y ~ normal( 137 | trend 138 | .* (1 + X * (beta .* s_m)) 139 | + X * (beta .* s_a), 140 | sigma_obs 141 | ); 142 | } 143 | -------------------------------------------------------------------------------- /src/forecastga/stan/win/prophet.stan: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | functions { 7 | real[ , ] get_changepoint_matrix(real[] t, real[] t_change, int T, int S) { 8 | // Assumes t and t_change are sorted. 9 | real A[T, S]; 10 | real a_row[S]; 11 | int cp_idx; 12 | 13 | // Start with an empty matrix. 14 | A = rep_array(0, T, S); 15 | a_row = rep_array(0, S); 16 | cp_idx = 1; 17 | 18 | // Fill in each row of A. 19 | for (i in 1:T) { 20 | while ((cp_idx <= S) && (t[i] >= t_change[cp_idx])) { 21 | a_row[cp_idx] = 1; 22 | cp_idx = cp_idx + 1; 23 | } 24 | A[i] = a_row; 25 | } 26 | return A; 27 | } 28 | 29 | // Logistic trend functions 30 | 31 | real[] logistic_gamma(real k, real m, real[] delta, real[] t_change, int S) { 32 | real gamma[S]; // adjusted offsets, for piecewise continuity 33 | real k_s[S + 1]; // actual rate in each segment 34 | real m_pr; 35 | 36 | // Compute the rate in each segment 37 | k_s[1] = k; 38 | for (i in 1:S) { 39 | k_s[i + 1] = k_s[i] + delta[i]; 40 | } 41 | 42 | // Piecewise offsets 43 | m_pr = m; // The offset in the previous segment 44 | for (i in 1:S) { 45 | gamma[i] = (t_change[i] - m_pr) * (1 - k_s[i] / k_s[i + 1]); 46 | m_pr = m_pr + gamma[i]; // update for the next segment 47 | } 48 | return gamma; 49 | } 50 | 51 | real[] logistic_trend( 52 | real k, 53 | real m, 54 | real[] delta, 55 | real[] t, 56 | real[] cap, 57 | real[ , ] A, 58 | real[] t_change, 59 | int S, 60 | int T 61 | ) { 62 | real gamma[S]; 63 | real Y[T]; 64 | 65 | gamma = logistic_gamma(k, m, delta, t_change, S); 66 | for (i in 1:T) { 67 | Y[i] = cap[i] / (1 + exp(-(k + dot_product(A[i], delta)) 68 | * (t[i] - (m + dot_product(A[i], gamma))))); 69 | } 70 | return Y; 71 | } 72 | 73 | // Linear trend function 74 | 75 | real[] linear_trend( 76 | real k, 77 | real m, 78 | real[] delta, 79 | real[] t, 80 | real[ , ] A, 81 | real[] t_change, 82 | int S, 83 | int T 84 | ) { 85 | real gamma[S]; 86 | real Y[T]; 87 | 88 | for (i in 1:S) { 89 | gamma[i] = -t_change[i] * delta[i]; 90 | } 91 | for (i in 1:T) { 92 | Y[i] = (k + dot_product(A[i], delta)) * t[i] + ( 93 | m + dot_product(A[i], gamma)); 94 | } 95 | return Y; 96 | } 97 | 98 | // Flat trend function 99 | 100 | real[] flat_trend( 101 | real m, 102 | int T 103 | ) { 104 | return rep_array(m, T); 105 | } 106 | 107 | 108 | } 109 | 110 | data { 111 | int T; // Number of time periods 112 | int K; // Number of regressors 113 | real t[T]; // Time 114 | real cap[T]; // Capacities for logistic trend 115 | real y[T]; // Time series 116 | int S; // Number of changepoints 117 | real t_change[S]; // Times of trend changepoints 118 | real X[T,K]; // Regressors 119 | vector[K] sigmas; // Scale on seasonality prior 120 | real tau; // Scale on changepoints prior 121 | int trend_indicator; // 0 for linear, 1 for logistic, 2 for flat 122 | real s_a[K]; // Indicator of additive features 123 | real s_m[K]; // Indicator of multiplicative features 124 | } 125 | 126 | transformed data { 127 | real A[T, S]; 128 | A = get_changepoint_matrix(t, t_change, T, S); 129 | } 130 | 131 | parameters { 132 | real k; // Base trend growth rate 133 | real m; // Trend offset 134 | real delta[S]; // Trend rate adjustments 135 | real sigma_obs; // Observation noise 136 | real beta[K]; // Regressor coefficients 137 | } 138 | 139 | transformed parameters { 140 | real trend[T]; 141 | real Y[T]; 142 | real beta_m[K]; 143 | real beta_a[K]; 144 | 145 | if (trend_indicator == 0) { 146 | trend = linear_trend(k, m, delta, t, A, t_change, S, T); 147 | } else if (trend_indicator == 1) { 148 | trend = logistic_trend(k, m, delta, t, cap, A, t_change, S, T); 149 | } else if (trend_indicator == 2){ 150 | trend = flat_trend(m, T); 151 | } 152 | 153 | for (i in 1:K) { 154 | beta_m[i] = beta[i] * s_m[i]; 155 | beta_a[i] = beta[i] * s_a[i]; 156 | } 157 | 158 | for (i in 1:T) { 159 | Y[i] = ( 160 | trend[i] * (1 + dot_product(X[i], beta_m)) + dot_product(X[i], beta_a) 161 | ); 162 | } 163 | } 164 | 165 | model { 166 | //priors 167 | k ~ normal(0, 5); 168 | m ~ normal(0, 5); 169 | delta ~ double_exponential(0, tau); 170 | sigma_obs ~ normal(0, 0.5); 171 | beta ~ normal(0, sigmas); 172 | 173 | // Likelihood 174 | y ~ normal(Y, sigma_obs); 175 | } 176 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # Libraries 2 | import pandas as pd 3 | import numpy as np 4 | import re 5 | from datetime import datetime 6 | from dateutil.rrule import rrule, MONTHLY 7 | 8 | import matplotlib.pyplot as plt 9 | 10 | import json 11 | import forecastga 12 | import forecastga.googleanalytics as ga 13 | 14 | 15 | # Logging 16 | import logging 17 | 18 | logging.basicConfig(level=logging.DEBUG) 19 | log = logging.getLogger(__name__) 20 | 21 | 22 | from types import SimpleNamespace 23 | 24 | with open("identity.json") as f: 25 | jf = json.load(f) 26 | identify_json = SimpleNamespace(**jf) 27 | 28 | 29 | # @title Google Analytics 30 | ga_url = "https://analytics.google.com/analytics/web/?authuser=2#/report-home/a49839941w81675857p84563570" # @param {type:"string"} 31 | ga_segment = "organic traffic" # @param ["all users", "organic traffic", "direct traffic", "referral traffic", "mobile traffic", "tablet traffic"] {type:"string"} 32 | ga_metric = "sessions" # @param ["sessions", "pageviews", "unique pageviews", "transactions"] {type:"string"} 33 | 34 | 35 | # @title Historical Data 36 | 37 | # @markdown #### Date Range: 38 | ga_start_date = "2018-01-01" # @param {type:"date"} 39 | ga_end_date = "2019-12-31" # @param {type:"date",name:"GA Date"} 40 | 41 | # @markdown *** 42 | # @markdown

OR
43 | 44 | # @markdown #### Prior Months: 45 | prior_months = 0 # @param {type:"integer"} 46 | 47 | # @title Prediction Data 48 | 49 | future_months = 2 # @param {type:"slider", min:1, max:24, step:1} 50 | # @markdown --- 51 | # @markdown `max_available_volume` is the total possible daily volume for a niche/geo. This helps keep the algorithm honest by putting a max possible upper bound on prediction. 52 | max_available_volume = 12222 # @param {type:"integer", hint:"this is a description"} 53 | # @markdown --- 54 | # @markdown `omit_values_over` is a way to clean your existing data to remove one-time spikes, caused by a rare event that is unlikely to happen again. This keeps the algorithm from using this data in its future predictions. 55 | omit_values_over = 2000000 # @param {type:"integer"} 56 | # @markdown --- 57 | save_output = False # @param {type:"boolean"} 58 | 59 | 60 | try: 61 | profile = ga.authenticate( 62 | client_id=identify_json.client_id, 63 | client_secret=identify_json.client_secret, 64 | identity=identify_json.identity, 65 | ga_url=ga_url, 66 | interactive=True, 67 | ) 68 | print("Authenticated") 69 | except Exception as e: 70 | print("An error occured", str(e)) 71 | 72 | 73 | class Struct: 74 | def __init__(self, **entries): 75 | self.__dict__.update(entries) 76 | 77 | 78 | def p_date(_dt): 79 | return datetime.strftime(_dt, "%Y-%m-%d") 80 | 81 | 82 | def get_months(start_date, end_date): 83 | strt_dt = datetime.strptime(start_date, "%Y-%m-%d") 84 | end_dt = datetime.strptime(end_date, "%Y-%m-%d") 85 | return rrule(MONTHLY, dtstart=strt_dt, until=end_dt).count() 86 | 87 | 88 | def get_ga_data(profile, data): 89 | 90 | try: 91 | 92 | if data.prior_months and int(data.prior_months) > 0: 93 | print("Pulling {} prior months data.".format(data.prior_months)) 94 | sessions = ( 95 | profile.core.query.metrics(data.ga_metric) 96 | .segment(data.ga_segment) 97 | .daily(months=0 - int(data.prior_months)) 98 | .report 99 | ) 100 | else: 101 | print( 102 | "Pulling data from {} to {}.".format( 103 | data.ga_start_date, data.ga_end_date 104 | ) 105 | ) 106 | sessions = ( 107 | profile.core.query.metrics(data.ga_metric) 108 | .segment(data.ga_segment) 109 | .daily(data.ga_start_date, data.ga_end_date) 110 | .report 111 | ) 112 | 113 | except Exception as e: 114 | print("Error. Error retreiving data from Google Analytics.", str(e)) 115 | return None 116 | 117 | df = sessions.as_dataframe() 118 | 119 | df["date"] = pd.to_datetime(df["date"]) 120 | 121 | # Clean data. 122 | if data.omit_values_over and int(data.omit_values_over) > 0: 123 | df.loc[df[data.ga_metric] > data.omit_values_over, data.ga_metric] = np.nan 124 | 125 | df.loc[df[data.ga_metric] < 1, data.ga_metric] = np.nan 126 | 127 | df.dropna(inplace=True, axis=0) 128 | 129 | print( 130 | "Rows: {rows} Min Date: {min_date} Max Date: {max_date}".format( 131 | rows=len(df), min_date=p_date(df.date.min()), max_date=p_date(df.date.max()) 132 | ) 133 | ) 134 | # Backfilling missing values 135 | df = df.set_index("date").asfreq("d", method="bfill") 136 | 137 | return df 138 | 139 | 140 | data = Struct( 141 | **{ 142 | "ga_segment": ga_segment, 143 | "ga_metric": ga_metric, 144 | "ga_start_date": ga_start_date, 145 | "ga_end_date": ga_end_date, 146 | "prior_months": prior_months, 147 | "omit_values_over": omit_values_over, 148 | } 149 | ) 150 | 151 | 152 | datafile = get_ga_data(profile, data) 153 | 154 | print(datafile.head()) 155 | 156 | 157 | model_list = ["TATS", "TBATS1", "TBATP1", "TBATS2", "ARIMA", "Gluonts"] 158 | 159 | am = forecastga.AutomatedModel( 160 | df=datafile["sessions"], model_list=model_list, forecast_len=30 161 | ) 162 | 163 | forecast_frame, preformance = am.forecast_insample() 164 | 165 | forecast_frame.head() 166 | -------------------------------------------------------------------------------- /tests/googleanalytics/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | """ 4 | These unit tests are somewhat limited in scope because they need 5 | to work with any Google Analytics data. Therefore, we mainly test 6 | for coherence and whether various functions return the proper 7 | data structure, rather than whether the results are exactly 8 | such or so. 9 | 10 | Before you can run these tests, create a "sandbox" project at 11 | https://console.developers.google.com/ and run `gash auth` 12 | to authenticate against it. Your human-readable account name 13 | should be `pyga-unittest`. 14 | 15 | The account you're using for these unit tests should have 16 | at least one Google Analytics domain set up. 17 | """ 18 | 19 | import googleanalytics as ga 20 | import unittest 21 | import datetime 22 | 23 | from . import meta, query, report 24 | -------------------------------------------------------------------------------- /tests/googleanalytics/base.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import unittest 4 | 5 | import googleanalytics as ga 6 | 7 | 8 | class TestCase(unittest.TestCase): 9 | def setUp(self): 10 | accounts = ga.authenticate() 11 | if not len(accounts): 12 | raise Exception( 13 | "Cannot proceed with unit testing: \ 14 | the authorized Google account does not use Google Analytics." 15 | ) 16 | else: 17 | self.account = accounts[0] 18 | self.webproperty = self.account.webproperties[0] 19 | self.profile = self.webproperty.profiles[0] 20 | self.query = self.profile.core.query 21 | -------------------------------------------------------------------------------- /tests/googleanalytics/meta.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import googleanalytics as ga 4 | 5 | from . import base 6 | 7 | 8 | class TestAuthentication(base.TestCase): 9 | """Test whether the various authentication procedures work, 10 | whether they result in tokens, whether those tokens can be 11 | revoked etc.""" 12 | 13 | 14 | class TestMetaData(base.TestCase): 15 | """Test whether various information about a Google Analytics 16 | account can be accessed: webproperties, profiles, columns, 17 | metrics, dimensions, segments.""" 18 | 19 | def _test_addressable(self): 20 | """ It should support multiple ways of pointing to a column. """ 21 | a = self.account.columns["pageviews"] 22 | b = self.account.columns["Pageviews"] 23 | c = self.account.columns["ga:pageviews"] 24 | 25 | self.assertEqual(a, b) 26 | self.assertEqual(b, c) 27 | 28 | 29 | if __name__ == "__main__": 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /tests/googleanalytics/query.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import googleanalytics as ga 4 | import os 5 | import datetime 6 | 7 | from . import base 8 | 9 | 10 | class TestQuerying(base.TestCase): 11 | def test_raw(self): 12 | """ It should allow people to construct raw queries. """ 13 | a = self.query.metrics("pageviews").range("2014-07-01", "2014-07-05") 14 | b = ( 15 | self.query.set(metrics=["ga:pageviews"]) 16 | .set("start_date", "2014-07-01") 17 | .set({"end_date": "2014-07-05"}) 18 | ) 19 | 20 | self.assertEqual(a.raw, b.raw) 21 | 22 | def test_range_days(self): 23 | """It should support various ways of defining date ranges, 24 | and these will result in the correct start and end dates.""" 25 | a = self.query.metrics("pageviews").range("2014-07-01", "2014-07-05") 26 | b = self.query.metrics("pageviews").range("2014-07-01", days=5) 27 | 28 | self.assertEqual(a.raw["start_date"], "2014-07-01") 29 | self.assertEqual(a.raw["end_date"], "2014-07-05") 30 | self.assertEqual(a.raw, b.raw) 31 | 32 | def test_range_months(self): 33 | """It should support various ways of defining date ranges, 34 | and these will result in the correct start and end dates.""" 35 | a = self.query.metrics("pageviews").range("2014-07-01", "2014-08-31") 36 | b = self.query.metrics("pageviews").range("2014-07-01", months=2) 37 | 38 | self.assertEqual(a.raw["start_date"], "2014-07-01") 39 | self.assertEqual(a.raw["end_date"], "2014-08-31") 40 | self.assertEqual(a.raw, b.raw) 41 | 42 | def test_query(self): 43 | """ It should be able to run a query and return a report. """ 44 | q = self.query.metrics("pageviews").range("2014-07-01", "2014-07-05") 45 | report = q.get() 46 | 47 | self.assertTrue(report.rows) 48 | 49 | def test_addressable_metrics(self): 50 | """ It should support multiple ways of pointing to a column. """ 51 | a = self.query.metrics("pageviews") 52 | b = self.query.metrics("Pageviews") 53 | c = self.query.metrics("ga:pageviews") 54 | d = self.query.metrics(self.profile.core.columns["pageviews"]) 55 | 56 | self.assertEqual(a.raw, b.raw) 57 | self.assertEqual(b.raw, c.raw) 58 | self.assertEqual(c.raw, d.raw) 59 | 60 | def test_query_immutable(self): 61 | """It should always refine queries by creating a new query and 62 | never modify the original base query.""" 63 | a = self.query.metrics("pageviews") 64 | b = a.range("2014-07-01") 65 | 66 | self.assertNotEqual(a, b) 67 | self.assertNotEqual(a.raw, b.raw) 68 | 69 | def test_granularity(self): 70 | """It should have shortcut functions that make it easier to 71 | define the granularity (hour, day, week, month, year) at which 72 | to query should return results.""" 73 | base = self.query.metrics("pageviews") 74 | a = base.range("2014-07-01", "2014-07-03").get() 75 | b = base.range("2014-07-01", "2014-07-03").interval("day").get() 76 | c = base.daily("2014-07-01", "2014-07-03").get() 77 | 78 | self.assertEqual(len(a), 1) 79 | self.assertEqual(len(b), 3) 80 | self.assertNotEqual(len(a), len(b)) 81 | self.assertEqual(len(b), len(c)) 82 | 83 | def test_step(self): 84 | """ It can limit the amount of results per request. """ 85 | q = ( 86 | self.query.metrics("pageviews") 87 | .range("2014-07-01", "2014-07-05") 88 | .interval("day") 89 | .step(2) 90 | ) 91 | report = q.get() 92 | 93 | self.assertEqual(len(report.queries), 3) 94 | 95 | def test_limit(self): 96 | """ It can limit the total amount of results. """ 97 | base = ( 98 | self.query.metrics("pageviews") 99 | .range("2014-07-01", "2014-07-05") 100 | .interval("day") 101 | ) 102 | full_report = base.get() 103 | limited_report = base.limit(2).get() 104 | 105 | self.assertEqual(len(limited_report.rows), 2) 106 | self.assertEqual(len(limited_report), 2) 107 | self.assertEqual(full_report["pageviews"][:2], limited_report["pageviews"]) 108 | 109 | def test_start_limit(self): 110 | """It can limit the total amount of results as well as the 111 | index at which to start.""" 112 | base = ( 113 | self.query.metrics("pageviews") 114 | .range("2014-07-01", "2014-07-05") 115 | .interval("day") 116 | ) 117 | full_report = base.get() 118 | limited_report = base.limit(2, 2).get() 119 | 120 | self.assertEqual(len(limited_report.rows), 2) 121 | self.assertEqual(len(limited_report), 2) 122 | self.assertEqual(full_report["pageviews"][1:3], limited_report["pageviews"]) 123 | 124 | def test_sort(self): 125 | """ It can ask the Google Analytics API for sorted results. """ 126 | q = ( 127 | self.query.metrics("pageviews") 128 | .range("2014-07-01", "2014-07-05") 129 | .interval("day") 130 | ) 131 | 132 | unsorted_report = q.get() 133 | sorted_report = q.sort("pageviews").get() 134 | inverse_sorted_report = q.sort(-self.profile.core.columns["pageviews"]).get() 135 | 136 | self.assertEqual(inverse_sorted_report.queries[0].raw["sort"], "-ga:pageviews") 137 | self.assertEqual( 138 | set(unsorted_report["pageviews"]), 139 | set(sorted_report["pageviews"]), 140 | ) 141 | self.assertEqual( 142 | sorted_report["pageviews"], 143 | inverse_sorted_report["pageviews"][::-1], 144 | ) 145 | 146 | def test_sort_additive(self): 147 | q = ( 148 | self.query.metrics("pageviews") 149 | .sort("pageviews") 150 | .sort("sessions", descending=True) 151 | .build() 152 | ) 153 | self.assertEqual(q["sort"], "ga:pageviews,-ga:sessions") 154 | 155 | def test_segment_simple(self): 156 | """ It should support segmenting data by a segment column. """ 157 | q = self.query.metrics("pageviews").range("2014-07-01") 158 | qs = q.segment("Direct Traffic") 159 | 160 | r = q.get() 161 | rs = qs.get() 162 | 163 | self.assertTrue(rs["pageviews"][0] <= r["pageviews"][0]) 164 | 165 | def test_filter_string(self): 166 | base = ( 167 | self.query.metrics("pageviews") 168 | .dimensions("ga:pagePath") 169 | .range("2014-07-01") 170 | ) 171 | every = base.get() 172 | lt = base.filter("ga:pageviews<10").get() 173 | gt = base.filter("ga:pageviews>10").get() 174 | every = set(every["pagepath"]) 175 | lt = set(lt["pagepath"]) 176 | gt = set(gt["pagepath"]) 177 | 178 | self.assertTrue(lt.issubset(every)) 179 | self.assertTrue(gt.issubset(every)) 180 | self.assertTrue(len(lt.intersection(gt)) == 0) 181 | 182 | def test_filter_keywords(self): 183 | q = self.query.metrics().filter(pageviews__lt=10).build() 184 | self.assertEqual(q["filters"], "ga:pageviews<10") 185 | 186 | def test_filter_additive(self): 187 | q = ( 188 | self.query.metrics("pageviews") 189 | .filter(medium=["cpc", "cpm"]) 190 | .filter(usertype__neq="Returning User") 191 | .build() 192 | ) 193 | self.assertEqual( 194 | q["filters"], "ga:medium==cpc,ga:medium==cpm;ga:userType!=Returning User" 195 | ) 196 | 197 | 198 | if __name__ == "__main__": 199 | unittest.main() 200 | -------------------------------------------------------------------------------- /tests/googleanalytics/report.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import googleanalytics as ga 4 | import datetime 5 | 6 | from . import base 7 | 8 | 9 | class TestReporting(base.TestCase): 10 | def test_tuples(self): 11 | """ It should parse analytics responses into named tuples with the correct column names. """ 12 | report = ( 13 | self.query.metrics("pageviews", "sessions") 14 | .dimensions("pagepath") 15 | .range("yesterday") 16 | .get() 17 | ) 18 | row = report.rows[0] 19 | self.assertEqual(row._fields, ("page_path", "pageviews", "sessions")) 20 | 21 | def test_shortcuts(self): 22 | """It should have shortcuts to grab the first and last row. 23 | It should have shortcuts to grab the first or all values of a one-metric query.""" 24 | report = self.query.metrics("pageviews").range("yesterday").get() 25 | self.assertEqual(report.first, report.rows[0]) 26 | self.assertEqual(report.last, report.rows[-1]) 27 | self.assertEqual(report.values, [report.rows[0].pageviews]) 28 | self.assertEqual(report.value, report.rows[0].pageviews) 29 | 30 | def test_columnwise(self): 31 | """ It should have the ability to extract a particular column of data. """ 32 | report = ( 33 | self.query.metrics("pageviews").dimensions("pagepath").daily(days=-10).get() 34 | ) 35 | self.assertEqual(report["pagepath"], [row.page_path for row in report.rows]) 36 | 37 | def test_serialization(self): 38 | """ serialized rows """ 39 | serialized = ( 40 | self.query.metrics("pageviews", "sessions").daily(days=-10).serialize() 41 | ) 42 | for row in serialized: 43 | self.assertTrue(set(row.keys()) == set(["date", "pageviews", "sessions"])) 44 | 45 | def test_cast_numbers(self): 46 | """It should cast columns that contain numeric data to the 47 | proper numeric types.""" 48 | q = self.query.metrics("pageviews").daily("2014-07-01", "2014-07-02") 49 | report = q.get() 50 | 51 | for n in report["pageviews"]: 52 | self.assertIsInstance(n, int) 53 | 54 | def test_cast_dates(self): 55 | """ It should cast columns containing dates to proper date objects. """ 56 | q = self.query.metrics("pageviews").daily("2014-07-01", "2014-07-02") 57 | report = q.get() 58 | 59 | for date in report["date"]: 60 | self.assertIsInstance(date, datetime.date) 61 | --------------------------------------------------------------------------------