├── gtime
    ├── utils
    │   ├── __init__.py
    │   ├── hypothesis
    │   │   ├── __init__.py
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   └── test_general_strategies.py
    │   │   ├── utils.py
    │   │   ├── general_strategies.py
    │   │   └── feature_matrices.py
    │   ├── testing_constants.py
    │   ├── trends.py
    │   └── fixtures.py
    ├── external
    │   ├── __init__.py
    │   └── make_holidays.py
    ├── causality
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── common.py
    │   │   ├── test_granger_causality.py
    │   │   ├── test_pearson_correlation.py
    │   │   └── test_linear_coefficient.py
    │   ├── __init__.py
    │   ├── pearson_correlation.py
    │   ├── linear_coefficient.py
    │   └── base.py
    ├── experimental
    │   ├── __init__.py
    │   └── trend_models
    │   │   └── function_trend.py
    ├── metrics
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── plotting
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_plotting.py
    │   │   └── test_preprocessing.py
    │   └── __init__.py
    ├── explainability
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── forecasting
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_online.py
    │   │   ├── test_trend.py
    │   │   └── test_naive.py
    │   ├── __init__.py
    │   ├── trend.py
    │   └── online.py
    ├── hierarchical
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_naive.py
    │   │   └── test_bottom_up.py
    │   ├── __init__.py
    │   ├── base.py
    │   └── naive.py
    ├── preprocessing
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── time_series_resampling.py
    │   └── __init__.py
    ├── regressors
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_linear_regressor.py
    │   │   └── test_explainable.py
    │   ├── __init__.py
    │   ├── linear_regressor.py
    │   ├── explainable.py
    │   └── multi_output.py
    ├── feature_extraction
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_trend.py
    │   │   ├── test_sorted_density.py
    │   │   └── test_crest_factor_detrending.py
    │   ├── __init__.py
    │   ├── trend.py
    │   └── custom.py
    ├── feature_generation
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_calendar.py
    │   │   └── test_external.py
    │   └── __init__.py
    ├── model_selection
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_splitters.py
    │   ├── __init__.py
    │   ├── horizon_shift.py
    │   └── splitters.py
    ├── time_series_models
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_simple_models.py
    │   │   └── test_cv_pipeline.py
    │   ├── __init__.py
    │   ├── ar.py
    │   └── simple_models.py
    ├── compose
    │   ├── __init__.py
    │   ├── tests
    │   │   └── test_feature_creation.py
    │   └── feature_creation.py
    ├── __init__.py
    ├── _version.py
    └── base.py
├── doc-requirements.txt
├── docs
    ├── source
    │   ├── modules
    │   │   ├── compose.rst
    │   │   ├── metrics.rst
    │   │   ├── regressors.rst
    │   │   ├── causality.rst
    │   │   ├── forecasting.rst
    │   │   ├── preprocessing.rst
    │   │   ├── model_selection.rst
    │   │   ├── feature_extraction.rst
    │   │   ├── feature_generation.rst
    │   │   ├── time_series_models.rst
    │   │   └── index.rst
    │   ├── index.rst
    │   └── conf.py
    ├── index.html
    ├── .nojekyll
    ├── Makefile
    └── make.bat
├── setup.cfg
├── MANIFEST.in
├── dev-requirements.txt
├── requirements.txt
├── CODE_AUTHORS
├── .pre-commit-config.yaml
├── .coveragerc
├── conftest.py
├── GOVERNANCE.rst
├── .github
    ├── workflows
    │   ├── build_and_publish.yml
    │   ├── deploy_github_pages.yml
    │   └── ci.yml
    └── ISSUE_TEMPLATE
    │   └── bug_report.md
├── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── setup.py
├── examples
    └── hierarchical_model.ipynb
├── CODE_OF_CONDUCT.rst
├── README.md
└── CONTRIBUTING.rst


/gtime/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/external/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/causality/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/metrics/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/plotting/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/explainability/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/forecasting/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/hierarchical/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/preprocessing/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/regressors/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/utils/hypothesis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/feature_extraction/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/feature_generation/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/model_selection/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/time_series_models/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gtime/utils/hypothesis/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/doc-requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | numpydoc
3 | sphinx_issues
4 | sphinx_rtd_theme
5 | 


--------------------------------------------------------------------------------
/docs/source/modules/compose.rst:
--------------------------------------------------------------------------------
1 | Compose
2 | ==================
3 | 
4 | .. automodule:: gtime.compose
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/modules/metrics.rst:
--------------------------------------------------------------------------------
1 | Metrics
2 | ==================
3 | 
4 | .. automodule:: gtime.metrics
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 | <meta http-equiv="refresh" content="0; url=https://giotto-ai.github.io/giotto-time/build/html/index.html">
2 | 


--------------------------------------------------------------------------------
/docs/source/modules/regressors.rst:
--------------------------------------------------------------------------------
1 | Regressors
2 | ==================
3 | 
4 | .. automodule:: gtime.regressors
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/modules/causality.rst:
--------------------------------------------------------------------------------
1 | Causality Tests
2 | ==================
3 | 
4 | .. automodule:: gtime.causality
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/modules/forecasting.rst:
--------------------------------------------------------------------------------
1 | Forecasting
2 | ==================
3 | 
4 | .. automodule:: gtime.forecasting
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/modules/preprocessing.rst:
--------------------------------------------------------------------------------
1 | Preprocessing
2 | ==================
3 | 
4 | .. automodule:: gtime.preprocessing
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/modules/model_selection.rst:
--------------------------------------------------------------------------------
1 | Model Selection
2 | ==================
3 | 
4 | .. automodule:: gtime.model_selection
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/gtime/explainability/__init__.py:
--------------------------------------------------------------------------------
1 | from .explainer import _ShapExplainer, _LimeExplainer
2 | 
3 | __all__ = ["_ShapExplainer", "_LimeExplainer"]
4 | 


--------------------------------------------------------------------------------
/docs/source/modules/feature_extraction.rst:
--------------------------------------------------------------------------------
1 | Feature Extraction
2 | ==================
3 | 
4 | .. automodule:: gtime.feature_extraction
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/modules/feature_generation.rst:
--------------------------------------------------------------------------------
1 | Feature Generation
2 | ==================
3 | 
4 | .. automodule:: gtime.feature_generation
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/modules/time_series_models.rst:
--------------------------------------------------------------------------------
1 | Time Series Models
2 | ==================
3 | 
4 | .. automodule:: gtime.time_series_models
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.rst
3 | 
4 | [tool:pytest]
5 | addopts =
6 |     --ignore doc
7 |     -ra
8 | 
9 |     --ignore gtime/experimental


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # Include the README
2 | include README.rst
3 | 
4 | # Include the license file
5 | include LICENSE
6 | 
7 | # Include the requirements file
8 | include requirements.txt


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
 1 | hypothesis==5.5.3
 2 | black
 3 | pre-commit
 4 | pytest
 5 | pytest-cov
 6 | pytest-xdist
 7 | pytest-lazy-fixture
 8 | flake8
 9 | mypy
10 | nbconvert
11 | jupyter
12 | 


--------------------------------------------------------------------------------
/gtime/utils/testing_constants.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | 
3 | DEFAULT_START = pd.Timestamp("1970-01-01")
4 | DEFAULT_END = pd.Timestamp("2020-01-01")
5 | DEFAULT_FREQ = pd.Timedelta("1D")
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pandas>=0.25.3
 2 | scipy>=0.17.0
 3 | scikit-learn>=0.22.0
 4 | matplotlib>=3.1.0
 5 | lime>=0.2.0.0
 6 | shap>=0.35
 7 | holidays>=0.10.2
 8 | lunarcalendar>=0.0.9
 9 | giotto-tda
10 | 


--------------------------------------------------------------------------------
/gtime/plotting/__init__.py:
--------------------------------------------------------------------------------
1 | from .plotting import seasonal_plot, seasonal_subplots, lag_plot, acf_plot
2 | 
3 | __all__ = [
4 |     "seasonal_plot",
5 |     "seasonal_subplots",
6 |     "acf_plot",
7 |     "lag_plot",
8 | ]
9 | 


--------------------------------------------------------------------------------
/gtime/compose/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`gtime.compose` module contains meta-estimators for building composite models
3 | with transformers.
4 | """
5 | 
6 | from .feature_creation import FeatureCreation
7 | 
8 | __all__ = ["FeatureCreation"]
9 | 


--------------------------------------------------------------------------------
/gtime/preprocessing/time_series_resampling.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | # FIXME: TBD
 5 | class _TimeSeriesResampler:
 6 |     def __init__(self):
 7 |         pass
 8 | 
 9 |     def transform(self, X: pd.Series):
10 |         raise NotImplementedError
11 | 


--------------------------------------------------------------------------------
/gtime/utils/trends.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def polynomial(X, weights):
 5 |     return np.poly1d(weights)(X)
 6 | 
 7 | 
 8 | def exponential(X, exponent):
 9 |     return np.exp(X * exponent)
10 | 
11 | 
12 | TRENDS = {"polynomial": polynomial, "exponential": exponential}
13 | 


--------------------------------------------------------------------------------
/CODE_AUTHORS:
--------------------------------------------------------------------------------
1 | # The following is the list of the code authors of the giotto-time python
2 | # package. Where component authors are known, add them here.
3 | 
4 | Alessio Baccelli a.baccelli@l2f.ch
5 | Stefano Savarè s.savare@l2f.ch
6 | Benjamin Russell b.russell@l2f.ch
7 | Matteo Caorsi m.caorsi@giotto.ai
8 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/psf/black
 3 |     rev: stable
 4 |     hooks:
 5 |       - id: black
 6 |         language_version: python3.7
 7 |   - repo: https://github.com/pre-commit/pre-commit-hooks.git
 8 |     sha: v0.9.5
 9 |     hooks:
10 |     -  id: no-commit-to-branch


--------------------------------------------------------------------------------
/gtime/__init__.py:
--------------------------------------------------------------------------------
 1 | from gtime._version import __version__
 2 | 
 3 | __all__ = [
 4 |     "causality",
 5 |     "compose",
 6 |     "feature_extraction",
 7 |     "feature_generation",
 8 |     "forecasting",
 9 |     "metrics",
10 |     "model_selection",
11 |     "preprocessing",
12 |     "regressors",
13 |     "time_series_models",
14 |     "utils",
15 | ]
16 | 


--------------------------------------------------------------------------------
/gtime/feature_generation/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.feature_generation` module deals with the creation of features that do
 3 | not depend on the input data, but just on its index.
 4 | """
 5 | 
 6 | from .calendar import Calendar
 7 | from .external import PeriodicSeasonal, Constant
 8 | 
 9 | __all__ = ["PeriodicSeasonal", "Constant", "Calendar"]
10 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | source = gtime
 4 | parallel = True
 5 | omit =
 6 |     **/experimental/*
 7 |     **/setup.py
 8 |     **/tests/*
 9 | 
10 | [report]
11 | exclude_lines =
12 |     # Have to re-enable the standard pragma
13 |     pragma: no cover
14 | 
15 |     # Don't complain if tests don't hit defensive assertion code:
16 |     raise NotImplementedError
17 | 


--------------------------------------------------------------------------------
/gtime/regressors/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.regressors` module contains regression models.
 3 | """
 4 | 
 5 | from .linear_regressor import LinearRegressor
 6 | from .multi_output import MultiFeatureMultiOutputRegressor
 7 | from .explainable import ExplainableRegressor
 8 | 
 9 | __all__ = [
10 |     "LinearRegressor",
11 |     "MultiFeatureMultiOutputRegressor",
12 |     "ExplainableRegressor",
13 | ]
14 | 


--------------------------------------------------------------------------------
/gtime/causality/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.causality` module deals with the causality tests for time
 3 | series data.
 4 | """
 5 | 
 6 | from .linear_coefficient import ShiftedLinearCoefficient
 7 | from .pearson_correlation import ShiftedPearsonCorrelation
 8 | from .granger_causality import GrangerCausality
 9 | 
10 | 
11 | __all__ = ["ShiftedLinearCoefficient", "ShiftedPearsonCorrelation", "GrangerCausality"]
12 | 


--------------------------------------------------------------------------------
/gtime/model_selection/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.model_selection` module deals with model selection.
 3 | """
 4 | 
 5 | from .horizon_shift import horizon_shift
 6 | from .splitters import FeatureSplitter
 7 | from .cross_validation import time_series_split, blocking_time_series_split
 8 | 
 9 | __all__ = [
10 |     "FeatureSplitter",
11 |     "horizon_shift",
12 |     "time_series_split",
13 |     "blocking_time_series_split",
14 | ]
15 | 


--------------------------------------------------------------------------------
/gtime/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.preprocessing` module deals with the preprocessing of time series
 3 | data.
 4 | """
 5 | 
 6 | from .time_series_conversion import (
 7 |     _SequenceToTimeIndexSeries,
 8 |     _PandasSeriesToTimeIndexSeries,
 9 |     _TimeIndexSeriesToPeriodIndexSeries,
10 | )
11 | 
12 | from .time_series_preparation import TimeSeriesPreparation
13 | 
14 | __all__ = [
15 |     "TimeSeriesPreparation",
16 | ]
17 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. giotto documentation master file, created by
 2 |    sphinx-quickstart on Mon Jun  3 11:56:46 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to giotto-time's API reference!
 7 | ========================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 3
11 |    :caption: Contents:
12 | 
13 |    modules/index
14 | 
15 | References
16 | ----------
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | 
21 | 


--------------------------------------------------------------------------------
/gtime/hierarchical/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.hierarchical` module contains hierarchical time series models.
 3 | """
 4 | 
 5 | from .base import HierarchicalBase
 6 | from .naive import HierarchicalNaive
 7 | from .bottom_up import HierarchicalBottomUp
 8 | from .top_down import HierarchicalTopDown
 9 | from .middle_out import HierarchicalMiddleOut
10 | 
11 | __all__ = [
12 |     "HierarchicalBase",
13 |     "HierarchicalNaive",
14 |     "HierarchicalBottomUp",
15 |     "HierarchicalTopDown",
16 |     "HierarchicalMiddleOut",
17 | ]
18 | 


--------------------------------------------------------------------------------
/gtime/time_series_models/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.time_series_models` module contains time series models.
 3 | """
 4 | 
 5 | from .base import TimeSeriesForecastingModel
 6 | from .ar import AR
 7 | from .simple_models import (
 8 |     Naive,
 9 |     SeasonalNaive,
10 |     Average,
11 |     Drift,
12 | )
13 | from .cv_pipeline import CVPipeline
14 | 
15 | __all__ = [
16 |     "TimeSeriesForecastingModel",
17 |     "AR",
18 |     "Naive",
19 |     "SeasonalNaive",
20 |     "Average",
21 |     "Drift",
22 |     "CVPipeline",
23 | ]
24 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | 
 3 | from hypothesis import settings, Verbosity, HealthCheck
 4 | 
 5 | settings.register_profile(
 6 |     "ci",
 7 |     max_examples=100,
 8 |     suppress_health_check=(HealthCheck.too_slow,),
 9 |     deadline=timedelta(milliseconds=1000),
10 | )
11 | settings.register_profile(
12 |     "dev",
13 |     max_examples=7,
14 |     suppress_health_check=(HealthCheck.too_slow,),
15 |     deadline=timedelta(milliseconds=1000),
16 | )
17 | settings.register_profile("debug", max_examples=7, verbosity=Verbosity.verbose)
18 | 


--------------------------------------------------------------------------------
/gtime/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.metrics` module contains a collection of different metrics.
 3 | """
 4 | 
 5 | from .metrics import (
 6 |     non_zero_smape,
 7 |     smape,
 8 |     max_error,
 9 |     mse,
10 |     log_mse,
11 |     r_square,
12 |     mae,
13 |     mape,
14 |     rmse,
15 |     rmsle,
16 |     gmae,
17 | )
18 | 
19 | __all__ = [
20 |     "non_zero_smape",
21 |     "smape",
22 |     "max_error",
23 |     "mse",
24 |     "rmse",
25 |     "log_mse",
26 |     "rmsle",
27 |     "r_square",
28 |     "mae",
29 |     "mape",
30 |     "gmae",
31 | ]
32 | 


--------------------------------------------------------------------------------
/GOVERNANCE.rst:
--------------------------------------------------------------------------------
 1 | This file describe the governance of the Giotto Time project.
 2 | 
 3 | Project owner:
 4 | --------------
 5 | 
 6 | - L2F SA
 7 | 
 8 | Authors:
 9 | --------
10 | 
11 | - Please refer to the `authors <https://github.com/giotto-ai/giotto-time/blob/master/CODE_AUTHORS>`_ file
12 | 
13 | Giotto Time Project Team:
14 | --------------------
15 | 
16 | - Alessio Baccelli a.baccelli@l2f.ch (Developer)
17 | - Stefano Savarè s.savare@l2f.ch (Developer)
18 | - Philippe Nguyen p.nguyen@l2f.ch (Developer)
19 | 
20 | Former Project Team Members:
21 | ----------------------------
22 | 
23 | - Benjamin Russell b.russell@l2f.ch
24 | 


--------------------------------------------------------------------------------
/docs/source/modules/index.rst:
--------------------------------------------------------------------------------
 1 | API reference
 2 | =============
 3 | This pages contains a list of available features in the library.
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 3
 7 | 
 8 |    causality
 9 | 
10 |    compose
11 | 
12 |    explainability
13 | 
14 |    external
15 | 
16 |    feature_extraction
17 | 
18 |    feature_generation
19 | 
20 |    forecasting
21 | 
22 |    hierarchical
23 | 
24 |    metrics
25 | 
26 |    model_selection
27 | 
28 |    plotting
29 | 
30 |    preprocessing
31 | 
32 |    regressors
33 | 
34 |    time_series_models
35 | 
36 |    utils
37 | 
38 | References
39 | ----------
40 | 
41 | * :ref:`genindex`
42 | * :ref:`modindex`


--------------------------------------------------------------------------------
/gtime/_version.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ``giotto-time`` is a set of python methods to perform time series forecasting
 3 | in a machine learning framework.
 4 | """
 5 | # License: Apache 2.0
 6 | 
 7 | # PEP0440 compatible formatted version, see:
 8 | # https://www.python.org/dev/peps/pep-0440/
 9 | #
10 | # Generic release markers:
11 | # X.Y
12 | # X.Y.Z # For bugfix releases
13 | #
14 | # Admissible pre-release markers:
15 | # X.YaN # Alpha release
16 | # X.YbN # Beta release
17 | # X.YrcN # Release Candidate
18 | # X.Y # Final release
19 | #
20 | # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
21 | # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
22 | #
23 | 
24 | __version__ = "0.2.2"
25 | 


--------------------------------------------------------------------------------
/gtime/causality/tests/common.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import pandas as pd
 4 | 
 5 | import pandas.util.testing as testing
 6 | 
 7 | 
 8 | def make_df_from_expected_shifts(expected_shifts: List[int]) -> pd.DataFrame:
 9 |     testing.N, testing.K = 500, 1
10 | 
11 |     df = testing.makeTimeDataFrame(freq="D")
12 |     for sh, k in zip(expected_shifts, range(3)):
13 |         df[f"shift_{k}"] = df["A"].shift(-sh)
14 |     df = df.dropna()
15 | 
16 |     return df
17 | 
18 | 
19 | def shift_df_from_expected_shifts(
20 |     df: pd.DataFrame, expected_shifts: List[int]
21 | ) -> pd.DataFrame:
22 |     for sh, k in zip(expected_shifts, range(3)):
23 |         df[f"shift_{k}"] = df[f"shift_{k}"].shift(-sh)
24 |     return df.dropna()
25 | 


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
 1 | # Compiled python modules.
 2 | *.pyc
 3 | *.pyo
 4 | *.pyd
 5 | **/__pycache__
 6 | 
 7 | # Setuptools distribution folder.
 8 | /dist/
 9 | 
10 | # Python egg metadata, regenerated from source files by setuptools.
11 | /*.egg-info
12 | *.so
13 | build
14 | 
15 | # Python jupyter notebooks
16 | examples/dask-worker-space
17 | examples/.ipynb_checkpoints
18 | 
19 | # Data files
20 | *.pkl
21 | *.csv
22 | *.pqt
23 | data/*
24 | 
25 | # Output files
26 | *.out
27 | 
28 | # External
29 | **.DS_Store
30 | .idea/*
31 | .vscode/*
32 | *~
33 | 
34 | # Unit test
35 | .pytest_cache/
36 | .hypothesis/
37 | 
38 | # Pytest output files
39 | test-output.xml
40 | 
41 | # Latex
42 | *.aux
43 | *.bbl
44 | *.blg
45 | *.brf
46 | *.log
47 | *.pdf
48 | *.synctex.gz
49 | *.toc


--------------------------------------------------------------------------------
/gtime/forecasting/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.forecasting` module contains a collection of machine learning models,
 3 | for dealing with time series data.
 4 | """
 5 | 
 6 | from .gar import GAR, GARFF, MultiFeatureMultiOutputRegressor, MultiFeatureGAR
 7 | from .trend import TrendForecaster
 8 | from .online import HedgeForecaster
 9 | from .naive import (
10 |     NaiveForecaster,
11 |     SeasonalNaiveForecaster,
12 |     DriftForecaster,
13 |     AverageForecaster,
14 | )
15 | 
16 | __all__ = [
17 |     "GAR",
18 |     "GARFF",
19 |     "MultiFeatureGAR",
20 |     "TrendForecaster",
21 |     "HedgeForecaster",
22 |     "NaiveForecaster",
23 |     "SeasonalNaiveForecaster",
24 |     "DriftForecaster",
25 |     "AverageForecaster",
26 |     "MultiFeatureMultiOutputRegressor",
27 | ]
28 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | clean:
23 | 	rm -rf build/ generated/ reference/generated/
24 | 


--------------------------------------------------------------------------------
/gtime/feature_extraction/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`gtime.feature_extraction` module deals with the creation of features
 3 | starting from a time series.
 4 | """
 5 | 
 6 | from gtime.feature_generation.calendar import Calendar
 7 | from .standard import (
 8 |     Shift,
 9 |     MovingAverage,
10 |     MovingMedian,
11 |     Max,
12 |     Min,
13 |     MovingCustomFunction,
14 |     Polynomial,
15 |     Exogenous,
16 |     CustomFeature,
17 | )
18 | from .custom import SortedDensity, CrestFactorDetrending
19 | 
20 | from .trend import Detrender
21 | 
22 | __all__ = [
23 |     "Shift",
24 |     "MovingAverage",
25 |     "MovingMedian",
26 |     "Max",
27 |     "Min",
28 |     "MovingCustomFunction",
29 |     "Polynomial",
30 |     "Exogenous",
31 |     "Calendar",
32 |     "Detrender",
33 |     "CustomFeature",
34 |     "SortedDensity",
35 |     "CrestFactorDetrending",
36 | ]
37 | 


--------------------------------------------------------------------------------
/gtime/forecasting/tests/test_online.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from gtime.forecasting.online import HedgeForecaster
 4 | 
 5 | 
 6 | def test_hedge_fit_predict():
 7 |     time_index = pd.date_range("2020-01-01", "2020-01-20")
 8 |     X_np = np.concatenate(
 9 |         (np.random.randint(4, size=(20, 2)), np.array([100] * 20).reshape(-1, 1)),
10 |         axis=1,
11 |     )
12 |     X = pd.DataFrame(X_np, index=time_index)
13 |     y = pd.DataFrame(
14 |         np.random.randint(4, size=(20, 1)), index=time_index, columns=["y_1"]
15 |     )
16 |     hr = HedgeForecaster(random_state=42)
17 | 
18 |     preds = hr.fit_predict(X, y)
19 |     np.testing.assert_equal(preds.shape, y.shape)
20 |     np.testing.assert_almost_equal(hr.weights_[0], hr.weights_[1], decimal=2)
21 |     assert hr.weights_[2] < hr.weights_[0]
22 |     assert hr.weights_[2] < hr.weights_[1]
23 | 


--------------------------------------------------------------------------------
/gtime/compose/tests/test_feature_creation.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | if pd.__version__ >= "1.0.0":
 4 |     import pandas._testing as testing
 5 | else:
 6 |     import pandas.util.testing as testing
 7 | from numpy.testing import assert_array_equal
 8 | 
 9 | from gtime.compose import FeatureCreation
10 | from gtime.feature_extraction import Shift, MovingAverage
11 | 
12 | 
13 | def test_feature_creation_transform():
14 |     data = testing.makeTimeDataFrame(freq="s")
15 | 
16 |     shift = Shift(1)
17 |     ma = MovingAverage(window_size=3)
18 | 
19 |     col_name = "A"
20 | 
21 |     fc = FeatureCreation([("s1", shift, [col_name]), ("ma3", ma, [col_name]),])
22 |     res = fc.fit(data).transform(data)
23 | 
24 |     assert_array_equal(
25 |         res.columns.values,
26 |         [
27 |             f"s1__{col_name}__{shift.__class__.__name__}",
28 |             f"ma3__{col_name}__{ma.__class__.__name__}",
29 |         ],
30 |     )
31 | 


--------------------------------------------------------------------------------
/gtime/base.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | from sklearn.utils.validation import check_is_fitted
 4 | 
 5 | 
 6 | def add_class_name(func):
 7 |     @functools.wraps(func)
 8 |     def wrapper_add_class_name(*args, **kwargs):
 9 |         value = func(*args, **kwargs)
10 |         return value.add_suffix("__" + args[0].__class__.__name__)
11 | 
12 |     return wrapper_add_class_name
13 | 
14 | 
15 | class FeatureMixin:
16 |     """Mixin class for all feature extraction estimators in giotto-time."""
17 | 
18 |     _estimator_type = "feature_extractor"
19 | 
20 |     def get_feature_names(self):
21 |         """Return feature names for output features.
22 | 
23 |         Returns
24 |         -------
25 |         output_feature_names : ndarray, shape (n_output_features,)
26 |             Array of feature names.
27 | 
28 |         """
29 |         check_is_fitted(self)
30 | 
31 |         return [f"{name}__{self.__class__.__name__}" for name in self.columns_]
32 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/.github/workflows/build_and_publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on: [workflow_dispatch]
 7 | 
 8 | jobs:
 9 |   deploy:
10 | 
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: Set up Python
16 |       uses: actions/setup-python@v2
17 |       with:
18 |         python-version: '3.x'
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install setuptools wheel twine
23 |     - name: Build and publish
24 |       env:
25 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
26 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
27 |       run: |
28 |         python setup.py sdist bdist_wheel
29 |         twine check dist/*
30 |         twine upload dist/*


--------------------------------------------------------------------------------
/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Thanks for contributing a pull request! Please ensure you have taken a look at
 3 | the contribution guidelines: https://github.com/giotto-ai/giotto-time/blob/master/CONTRIBUTING.md#pull-request-checklist
 4 | -->
 5 | 
 6 | #### Reference Issues/PRs
 7 | <!--
 8 | Example: Fixes #1234. See also #3456.
 9 | Please use keywords (e.g., Fixes) to create link to the issues or pull requests
10 | you resolved, so that they will automatically be closed when your pull request
11 | is merged. See https://github.com/blog/1506-closing-issues-via-pull-requests
12 | -->
13 | 
14 | 
15 | #### What does this implement/fix? Explain your changes.
16 | 
17 | 
18 | #### Any other comments?
19 | 
20 | 
21 | <!--
22 | We value all user contributions, no matter how minor they are. If we are slow to
23 | review, either the pull request needs some benchmarking, tinkering,
24 | convincing, etc. or more likely the reviewers are simply busy. In either
25 | case, we ask for your understanding during the review process.
26 | 
27 | Thanks for contributing!
28 | -->
29 | 


--------------------------------------------------------------------------------
/gtime/causality/tests/test_granger_causality.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import pandas as pd
 4 | 
 5 | 
 6 | if pd.__version__ >= "1.0.0":
 7 |     import pandas._testing as testing
 8 | else:
 9 |     import pandas.util.testing as testing
10 | from gtime.causality import GrangerCausality
11 | 
12 | 
13 | # Expected values from results of statstools
14 | @pytest.mark.parametrize(
15 |     "test_input, expected",
16 |     [
17 |         (["ssr_f"], 0.8420421667509344),
18 |         (["ssr_chi2"], 0.8327660223526767),
19 |         (["likelihood_chi2"], 0.8341270186135072),
20 |         (["zero_f"], 0.8420421667508992),
21 |     ],
22 | )
23 | def test_granger_pvalues_ssr_f(test_input, expected):
24 |     # Set random seed, otherwise testing creates a new dataframe each time.
25 |     np.random.seed(12)
26 | 
27 |     data = testing.makeTimeDataFrame(freq="s", nper=1000)
28 |     granger = (
29 |         GrangerCausality(target_col="A", x_col="B", max_shift=10, statistics=test_input)
30 |         .fit(data)
31 |         .results_[0]
32 |     )
33 | 
34 |     p_value = granger.values[1]
35 |     # Not exactly equal but up test to 7 digits
36 |     np.testing.assert_almost_equal(p_value, expected, decimal=7)
37 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | <!-- Instructions For Filing a Bug: https://github.com/giotto-ai/giotto-time/blob/master/CONTRIBUTING.rst -->
 2 | 
 3 | #### Description
 4 | <!-- Example: Joblib Error thrown when calling fit on VietorisRipsPersistence 
 5 | -->
 6 | 
 7 | #### Steps/Code to Reproduce
 8 | <!--
 9 | If the code is too long, feel free to put it in a public gist and link
10 | it in the issue: https://gist.github.com
11 | -->
12 | 
13 | #### Expected Results
14 | <!-- Example: No error is thrown. Please paste or describe the expected results.-->
15 | 
16 | #### Actual Results
17 | <!-- Please paste or specifically describe the actual output or traceback. -->
18 | 
19 | #### Versions
20 | <!--
21 | Please run the following snippet and paste the output below.
22 | import platform; print(platform.platform())
23 | import sys; print("Python", sys.version)
24 | import numpy; print("NumPy", numpy.__version__)
25 | import scipy; print("SciPy", scipy.__version__)
26 | import joblib; print("joblib", joblib.__version__)
27 | import sklearn; print("Scikit-Learn", sklearn.__version__)
28 | import giotto; print("giotto-Learn", giotto.__version__)
29 | import gtime; print("giotto-time", gtime.__version__)
30 | -->
31 | 
32 | 
33 | <!-- Thanks for contributing! -->
34 | 


--------------------------------------------------------------------------------
/gtime/forecasting/tests/test_trend.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | import pandas.util.testing as testing
 5 | 
 6 | from gtime.forecasting import TrendForecaster
 7 | 
 8 | 
 9 | def test_polynomial_trend():
10 |     testing.N, testing.K = 500, 1
11 |     df = testing.makeTimeDataFrame(freq="D")
12 | 
13 |     df["A"] = df["A"] + 0.0005 * pd.Series(
14 |         index=df.index, data=range(df.shape[0])
15 |     ) * pd.Series(index=df.index, data=range(df.shape[0]))
16 | 
17 |     tm = TrendForecaster(trend="polynomial", trend_x0=0.0)
18 |     tm.fit(df["A"])
19 |     # too hard to expect every time
20 |     # assert np.allclose(tm.best_trend_params_, [0.0] * len(tm.best_trend_params_))
21 |     assert len(tm.best_trend_params_) == 1
22 | 
23 | 
24 | def test_exponential_trend():
25 |     testing.N, testing.K = 500, 1
26 |     df = testing.makeTimeDataFrame(freq="D")
27 | 
28 |     df["A"] = df["A"] + 0.0005 * pd.Series(
29 |         index=df.index, data=range(df.shape[0])
30 |     ).apply(lambda x: np.exp(0.03 * x))
31 | 
32 |     tm = TrendForecaster(trend="exponential", trend_x0=4 * [0.0])
33 |     tm.fit(df)
34 |     # too hard to expect this result every time
35 |     # assert np.allclose(tm.best_trend_params_, [0.0] * len(tm.best_trend_params_))
36 |     assert len(tm.best_trend_params_) == 4
37 | 
38 | # TODO: predicting tests
39 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy_github_pages.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Deploy to gh-pages
 5 | on: [workflow_dispatch]
 6 | jobs:
 7 |   build:
 8 | 
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |     - uses: actions/checkout@v2
13 |     - name: Set up Python 3.8
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: 3.8
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
21 |         if [ -f doc-requirements.txt ]; then pip install -r doc-requirements.txt; fi
22 |     - name: Install giotto-time
23 |       run: |
24 |         pip install -e .
25 |     - name: Git checkout and build sphinx docs
26 |       run: |
27 |         git config --global user.name "github-pages[bot]"
28 |         git config --global user.email "41898281+github-pages[bot]@users.noreply.github.com"
29 |         git fetch
30 |         git checkout gh-pages
31 |         git checkout master
32 |         cd docs
33 |         make html
34 |     - name: push to gh-pages
35 |       run: |
36 |         git symbolic-ref HEAD refs/heads/gh-pages
37 |         git reset --mixed gh-pages
38 |         git add --all
39 |         git add -f docs/build
40 |         git commit -m "push sphinx build"
41 |         git push origin gh-pages
42 | 


--------------------------------------------------------------------------------
/gtime/causality/tests/test_pearson_correlation.py:
--------------------------------------------------------------------------------
 1 | from random import randint
 2 | 
 3 | import numpy as np
 4 | 
 5 | from gtime.causality import ShiftedPearsonCorrelation
 6 | from gtime.causality.tests.common import make_df_from_expected_shifts
 7 | 
 8 | 
 9 | def test_pearson_correlation():
10 |     expected_shifts = [randint(2, 6) * 2 for _ in range(3)]
11 |     df = make_df_from_expected_shifts(expected_shifts)
12 | 
13 |     spc = ShiftedPearsonCorrelation(target_col="A", max_shift=12)
14 |     spc.fit(df)
15 | 
16 |     shifts = spc.best_shifts_["A"][4:].values
17 |     np.testing.assert_array_equal(shifts, expected_shifts)
18 | 
19 | 
20 | def test_pearson_bootstrap_p_values():
21 |     expected_shifts = [randint(2, 9) * 2 for _ in range(3)]
22 |     df = make_df_from_expected_shifts(expected_shifts)
23 |     shifted_test = ShiftedPearsonCorrelation(
24 |         target_col="A", max_shift=5, bootstrap_iterations=500,
25 |     )
26 |     shifted_test.fit(df)
27 | 
28 |     pearson_p_values = shifted_test.bootstrap_p_values_
29 |     for col_index in range(len(pearson_p_values.columns)):
30 |         assert pearson_p_values.iloc[col_index, col_index] == 0
31 | 
32 | 
33 | def test_pearson_permutation_p_values():
34 |     expected_shifts = [randint(2, 9) * 2 for _ in range(3)]
35 |     df = make_df_from_expected_shifts(expected_shifts)
36 |     shifted_test = ShiftedPearsonCorrelation(
37 |         target_col="A", max_shift=5, permutation_iterations=50,
38 |     )
39 |     shifted_test.fit(df)
40 | 
41 |     pearson_p_values = shifted_test.permutation_p_values_
42 |     for col_index in range(len(pearson_p_values.columns)):
43 |         assert pearson_p_values.iloc[col_index, col_index] == 0
44 | 


--------------------------------------------------------------------------------
/gtime/hierarchical/base.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import Any, Dict, Union
 3 | 
 4 | import pandas as pd
 5 | from sklearn.base import BaseEstimator, RegressorMixin
 6 | 
 7 | 
 8 | class HierarchicalBase(BaseEstimator, RegressorMixin):
 9 |     """ Base class for hierarchical models.
10 | 
11 |     Parameters
12 |     ----------
13 |     model : BaseEstimator, required
14 |         base model applied to all the time series
15 |     hierarchy_tree: Union[str, Dict[str, Any]], optional, default = ``'infer'``
16 |         hierarchy structure between time series. If 'infer' a standard structure if inferred. It
17 |         depends on the subclass the implementation of infer.
18 |     """
19 | 
20 |     def __init__(
21 |         self, model: BaseEstimator, hierarchy_tree: Union[str, Dict[str, Any]] = "infer"
22 |     ):
23 |         self.model = model
24 |         self.hierarchy_tree = hierarchy_tree
25 | 
26 |     @abstractmethod
27 |     def fit(self, X: Dict[str, pd.DataFrame], y=None):
28 |         raise NotImplementedError
29 | 
30 |     @abstractmethod
31 |     def predict(self, X: Dict[str, pd.DataFrame] = None):
32 |         raise NotImplementedError
33 | 
34 |     @staticmethod
35 |     def _check_is_dict_of_dataframes_with_str_key(X: Any):
36 |         if not isinstance(X, dict):
37 |             raise ValueError(
38 |                 f"X must be a dictionary of pd.DataFrame. Detected: {type(X)}"
39 |             )
40 |         if not all(isinstance(key, str) for key in X):
41 |             raise ValueError("All X keys must be string")
42 |         if not all(isinstance(df, pd.DataFrame) for df in X.values()):
43 |             raise ValueError("All values of X must be pd.DataFrame")
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | 
  5 | # C extensions
  6 | *.so
  7 | 
  8 | # Distribution / packaging
  9 | .Python
 10 | env/
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | notebooks/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | 
 47 | # Translations
 48 | *.mo
 49 | *.pot
 50 | 
 51 | # Django stuff:
 52 | *.log
 53 | 
 54 | # Sphinx documentation
 55 | docs/_build/
 56 | doc/build/
 57 | doc/generated/
 58 | doc/reference/generated/
 59 | 
 60 | # PyBuilder
 61 | target/
 62 | 
 63 | # DotEnv configuration
 64 | .env
 65 | 
 66 | # Database
 67 | *.db
 68 | *.rdb
 69 | 
 70 | # Pycharm
 71 | .idea
 72 | 
 73 | # VS Code
 74 | .vscode/
 75 | 
 76 | # Spyder
 77 | .spyproject/
 78 | 
 79 | # Jupyter NB Checkpoints
 80 | .ipynb_checkpoints/
 81 | Untitled*
 82 | 
 83 | # exclude data from source control by default
 84 | /data/
 85 | 
 86 | # Mac OS-specific storage files
 87 | .DS_Store
 88 | 
 89 | # vim
 90 | *.swp
 91 | *.swo
 92 | 
 93 | # Mypy cache
 94 | .mypy_cache/
 95 | 
 96 | # ignore huge time_series_models
 97 | models/*.joblib
 98 | 
 99 | # Hypothesis
100 | .hypothesis/
101 | 
102 | # PyTest
103 | .pytest_cache/
104 | 
105 | # Excel temporary
106 | ~$*.xls*
107 | 


--------------------------------------------------------------------------------
/gtime/feature_generation/tests/test_calendar.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pytest
 4 | from hypothesis import given, settings
 5 | 
 6 | from gtime.feature_extraction import Calendar
 7 | from gtime.utils.hypothesis.time_indexes import giotto_time_series
 8 | 
 9 | 
10 | def test_empty_and_non_finite_kernel_error():
11 |     with pytest.raises(ValueError):
12 |         Calendar(
13 |             start_date="ignored",
14 |             end_date="ignored",
15 |             country="Brazil",
16 |             kernel=np.array([]),
17 |         )
18 | 
19 |     with pytest.raises(ValueError):
20 |         Calendar(
21 |             start_date="ignored",
22 |             end_date="ignored",
23 |             country="Brazil",
24 |             kernel=np.array([np.nan, 1]),
25 |         )
26 | 
27 | 
28 | def test_unevenly_spaced_time_series():
29 |     unevenly_spaced_ts = pd.DataFrame(
30 |         index=[
31 |             pd.Period("2012-01-01"),
32 |             pd.Period("2012-01-03"),
33 |             pd.Period("2012-01-10"),
34 |         ]
35 |     )
36 |     cal_feature = Calendar(
37 |         start_date="ignored",
38 |         end_date="ignored",
39 |         country="Brazil",
40 |         kernel=np.array([0, 1]),
41 |     )
42 | 
43 |     with pytest.raises(ValueError):
44 |         cal_feature.fit_transform(unevenly_spaced_ts)
45 | 
46 | 
47 | @settings(deadline=pd.Timedelta(milliseconds=5000), max_examples=7)
48 | @given(giotto_time_series(min_length=2, max_length=30))
49 | def test_correct_index_random_ts(ts):
50 |     cal_feature = Calendar(
51 |         start_date="ignored",
52 |         end_date="ignored",
53 |         country="Brazil",
54 |         kernel=np.array([1, 2]),
55 |     )
56 |     Xt = cal_feature.fit_transform(ts)
57 |     np.testing.assert_array_equal(Xt.index, ts.index)
58 | 


--------------------------------------------------------------------------------
/gtime/time_series_models/ar.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union, Optional
 2 | 
 3 | import numpy as np
 4 | from sklearn.compose import make_column_selector
 5 | from sklearn.linear_model import LinearRegression
 6 | 
 7 | from gtime.feature_extraction import Shift
 8 | from gtime.forecasting import GAR
 9 | from gtime.time_series_models import TimeSeriesForecastingModel
10 | 
11 | 
12 | class AR(TimeSeriesForecastingModel):
13 |     """ Standard AR model for time series
14 | 
15 |     Parameters
16 |     ----------
17 |     p: int, required
18 |         p parameter in AR
19 |     horizon: int, required
20 |         how many steps to predict in the future
21 | 
22 |     Examples
23 |     --------
24 |     >>> import pandas._testing as testing
25 |     >>> from gtime.time_series_models import AR
26 |     >>>
27 |     >>> testing.N, testing.K = 20, 1
28 |     >>> data = testing.makeTimeDataFrame(freq="s")
29 |     >>> ar = AR(p=2, horizon=3, column_name='A')
30 |     >>>
31 |     >>> ar.fit(data)
32 |     >>> ar.predict()
33 |                               y_1       y_2       y_3
34 |     2000-01-01 00:00:17  0.037228  0.163446 -0.237299
35 |     2000-01-01 00:00:18 -0.139627 -0.018082  0.063273
36 |     2000-01-01 00:00:19 -0.107707  0.052031 -0.105526
37 |     """
38 | 
39 |     def __init__(
40 |         self,
41 |         p: int,
42 |         horizon: Union[int, List[int]],
43 |         explainer_type: Optional[str] = None,
44 |     ):
45 |         self.p = p
46 |         self.explainer_type = explainer_type
47 |         features = [
48 |             tuple((f"s{i}", Shift(i), make_column_selector(dtype_include=np.number)))
49 |             for i in range(p)
50 |         ]
51 |         model = GAR(LinearRegression(), explainer_type=explainer_type)
52 |         super().__init__(features=features, horizon=horizon, model=model)
53 | 


--------------------------------------------------------------------------------
/gtime/model_selection/horizon_shift.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from gtime.feature_extraction import Shift
 6 | 
 7 | 
 8 | def horizon_shift(
 9 |     time_series: pd.DataFrame, horizon: Union[int, List[int]] = 5
10 | ) -> pd.DataFrame:
11 |     """Perform a shift of the original ``time_series`` for each time step between 1 and
12 |     ``horizon``.
13 | 
14 |     Parameters
15 |     ----------
16 |     time_series : pd.DataFrame, shape (n_samples, n_features), required
17 |         The list of ``TimeSeriesFeature`` from which to compute the feature_extraction.
18 | 
19 |     horizon : int, optional, default: ``5``
20 |         It represents how much into the future is necessary to predict. This corresponds
21 |         to the number of shifts that are going to be performed on y.
22 |         
23 |     Returns
24 |     -------
25 |     y : pd.DataFrame, shape (n_samples, horizon)
26 |         The shifted time series.
27 | 
28 |     Examples
29 |     --------
30 |     >>> import pandas as pd
31 |     >>> from gtime.model_selection import horizon_shift
32 |     >>> X = pd.DataFrame(range(0, 5), index=pd.date_range("2020-01-01", "2020-01-05"))
33 |     >>> horizon_shift(X, horizon=2)
34 |                 y_1  y_2
35 |     2020-01-01  1.0  2.0
36 |     2020-01-02  2.0  3.0
37 |     2020-01-03  3.0  4.0
38 |     2020-01-04  4.0  NaN
39 |     2020-01-05  NaN  NaN
40 |     >>> horizon_shift(X, horizon=[2])
41 |                 y_2
42 |     2020-01-01  2.0
43 |     2020-01-02  3.0
44 |     2020-01-03  4.0
45 |     2020-01-04  NaN
46 |     2020-01-05  NaN
47 | 
48 |     """
49 |     horizon = range(1, horizon + 1) if isinstance(horizon, (int, float)) else horizon
50 |     y = pd.DataFrame(index=time_series.index)
51 |     for k in sorted(horizon):
52 |         shift_feature = Shift(-k)
53 |         y[f"y_{k}"] = shift_feature.fit_transform(time_series)
54 | 
55 |     return y
56 | 


--------------------------------------------------------------------------------
/gtime/utils/hypothesis/utils.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Union, Tuple
 3 | 
 4 | import hypothesis.strategies as st
 5 | import pandas as pd
 6 | 
 7 | 
 8 | def initialize_start_date_end_date(
 9 |     start: datetime, end: datetime
10 | ) -> Tuple[datetime, datetime]:
11 |     start = start if start is not None else pd.Timestamp("1980-01-01")
12 |     end = end if end is not None else pd.Timestamp("2020-01-01")
13 |     return start, end
14 | 
15 | 
16 | def initialize_start_timedelta_end_timedelta(start: pd.Timedelta, end: pd.Timedelta):
17 |     start = start if start is not None else pd.Timedelta(0)
18 |     end = end if end is not None else pd.Timedelta("40Y")
19 |     return start, end
20 | 
21 | 
22 | def order_pair(element1, element2):
23 |     return st.builds(
24 |         lambda start, end: (start, end), start=element1, end=element2
25 |     ).filter(lambda x: x[0] < x[1])
26 | 
27 | 
28 | def expected_start_date_from(
29 |     end: Union[datetime, pd.Period], periods: int, freq: pd.Timedelta
30 | ) -> Union[datetime, pd.Period]:
31 |     return end - periods * freq
32 | 
33 | 
34 | def expected_end_date_from(
35 |     start: Union[datetime, pd.Period], periods: int, freq: pd.Timedelta
36 | ) -> Union[datetime, pd.Period]:
37 |     return start + periods * freq
38 | 
39 | 
40 | def expected_index_length_from(
41 |     start: Union[datetime, pd.Period],
42 |     end: Union[datetime, pd.Period],
43 |     freq: pd.Timedelta,
44 | ) -> int:
45 |     expected_index_length = (end - start) // freq
46 |     return expected_index_length
47 | 
48 | 
49 | def freq_to_timedelta(
50 |     freq: str, approximate_if_non_uniform: bool = True
51 | ) -> pd.Timedelta:
52 |     try:
53 |         return pd.to_timedelta(f"1{freq}")
54 |     except ValueError as e:
55 |         if approximate_if_non_uniform:
56 |             correspondences = {
57 |                 "B": pd.Timedelta(1, unit="D"),
58 |                 "Q": pd.Timedelta(90, unit="D"),
59 |                 "A": pd.Timedelta(365, unit="D"),
60 |             }
61 |             return correspondences[freq]
62 |         else:
63 |             raise e
64 | 


--------------------------------------------------------------------------------
/gtime/utils/hypothesis/tests/test_general_strategies.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import pytest
 4 | from hypothesis import given
 5 | from hypothesis.strategies import integers, data
 6 | 
 7 | from gtime.utils.hypothesis.general_strategies import (
 8 |     ordered_pair,
 9 |     shape_matrix,
10 |     shape_X_y_matrices,
11 |     regressors,
12 | )
13 | 
14 | 
15 | @given(ordered_pair(0, 10))
16 | def test_ordered_pair(pair: Tuple[int, int]):
17 |     assert pair[0] < pair[1]
18 | 
19 | 
20 | @given(ordered_pair(27, 132))
21 | def test_ordered_pair_values(pair: Tuple[int, int]):
22 |     assert pair[0] >= 27
23 |     assert pair[1] <= 132
24 | 
25 | 
26 | @given(data=data(), value=integers(0, 10))
27 | def test_ordered_pair_min_equal_max(data, value):
28 |     with pytest.raises(ValueError):
29 |         data.draw(ordered_pair(value, value))
30 | 
31 | 
32 | @given(data=data(), shape_0=ordered_pair(10, 100), shape_1=ordered_pair(1, 8))
33 | def test_shape_X(data, shape_0, shape_1):
34 |     shape = data.draw(shape_matrix(*shape_0, *shape_1))
35 |     assert shape_0[0] <= shape[0] <= shape_0[1]
36 |     assert shape_1[0] <= shape[1] <= shape_1[1]
37 | 
38 | 
39 | @given(shape_X_y_matrices(123, 243, 12, 34, 1, 6, y_as_vector=False))
40 | def test_shape_X_y_matrices_y_matrix(shape_X_y):
41 |     shape_X, shape_y = shape_X_y
42 |     assert shape_X[0] == shape_y[0]
43 |     assert 12 <= shape_X[1] <= 34
44 |     assert 1 <= shape_y[1] <= 6
45 | 
46 | 
47 | @given(shape_X_y_matrices(123, 243, 12, 34, 1, 6, y_as_vector=True))
48 | def test_shape_X_y_matrices_y_vector(shape_X_y):
49 |     shape_X, shape_y = shape_X_y
50 |     assert shape_X[0] == shape_y[0]
51 |     assert 12 <= shape_X[1] <= 34
52 |     assert len(shape_y) == 1
53 | 
54 | 
55 | @given(shape_X_y_matrices(10, 20, 10, 20, 1, 6))
56 | def test_shape_1_X_smaller_shape_0(shape_X_y):
57 |     shape_X, shape_y = shape_X_y
58 |     assert shape_X[0] > shape_X[1]
59 | 
60 | 
61 | @given(data=data())
62 | def test_shape_X_Y_value_error(data):
63 |     with pytest.raises(ValueError):
64 |         data.draw(shape_X_y_matrices(1, 8, 9, 10, 10, 20))
65 | 
66 | 
67 | @given(regressors())
68 | def test_regressors(regressor):
69 |     assert hasattr(regressor, "fit")
70 |     assert hasattr(regressor, "predict")
71 | 


--------------------------------------------------------------------------------
/gtime/utils/hypothesis/general_strategies.py:
--------------------------------------------------------------------------------
 1 | from hypothesis import assume
 2 | from hypothesis.strategies import tuples, integers, floats, sampled_from
 3 | import hypothesis.strategies as st
 4 | from sklearn.ensemble import (
 5 |     BaggingRegressor,
 6 |     AdaBoostRegressor,
 7 |     GradientBoostingRegressor,
 8 |     RandomForestRegressor,
 9 | )
10 | from sklearn.linear_model import LinearRegression, Ridge, BayesianRidge
11 | from sklearn.tree import ExtraTreeRegressor
12 | 
13 | 
14 | def ordered_pair(min_value: int, max_value: int):
15 |     if min_value == max_value:
16 |         raise ValueError("min_value and max_value can not be the same")
17 |     return (
18 |         tuples(integers(min_value, max_value), integers(min_value, max_value))
19 |         .map(sorted)
20 |         .filter(lambda x: x[0] < x[1])
21 |     )
22 | 
23 | 
24 | def shape_matrix(min_shape_0=30, max_shape_0=200, min_shape_1=5, max_shape_1=10):
25 |     return tuples(
26 |         integers(min_shape_0, max_shape_0), integers(min_shape_1, max_shape_1)
27 |     ).filter(lambda x: x[0] > x[1])
28 | 
29 | 
30 | @st.composite
31 | def shape_X_y_matrices(
32 |     draw,
33 |     min_shape_0=30,
34 |     max_shape_0=200,
35 |     min_shape_1_X=5,
36 |     max_shape_1_X=10,
37 |     min_shape_1_y=1,
38 |     max_shape_1_y=3,
39 |     y_as_vector=True,
40 | ):
41 |     if max_shape_0 <= min_shape_1_X:
42 |         raise ValueError(
43 |             f"max_shape_0 must be greater than min_shape_1_X: "
44 |             f"{max_shape_0}, {min_shape_1_X}"
45 |         )
46 |     shape_0 = draw(integers(min_shape_0, max_shape_0))
47 |     shape_X = draw(shape_matrix(shape_0, shape_0, min_shape_1_X, max_shape_1_X))
48 |     if y_as_vector:
49 |         shape_y = (shape_0,)
50 |     else:
51 |         shape_y = draw(shape_matrix(shape_0, shape_0, min_shape_1_y, max_shape_1_y))
52 |     assume(shape_X[1] < shape_X[0])
53 |     return shape_X, shape_y
54 | 
55 | 
56 | @st.composite
57 | def regressors(draw):
58 |     regressors = [
59 |         LinearRegression(),
60 |         Ridge(alpha=draw(floats(0.00001, 2))),
61 |         BayesianRidge(),
62 |         ExtraTreeRegressor(),
63 |         GradientBoostingRegressor(),
64 |         RandomForestRegressor(),
65 |     ]
66 |     return draw(sampled_from(regressors))
67 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: CI
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: ['3.8', '3.9', '3.10']
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v2
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30 |         if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi
31 |         pip install -e .
32 |     - name: Lint with flake8
33 |       run: |
34 |         # stop the build if there are Python syntax errors or undefined names
35 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38 |     - name: Type checking with mypy
39 |       run: |
40 |         mypy --ignore-missing-imports . || {
41 |           status=$? echo "Type checking errors!"
42 |         }
43 |     - name: Test with pytest
44 |       continue-on-error: true
45 |       run: |
46 |         pytest --maxfail=10
47 |     - name: Integration tests
48 |       run: |
49 |         set -e
50 |         for n in examples/*.ipynb
51 |         do
52 |           jupyter nbconvert --to notebook --execute $n
53 |         done
54 |     - name: Build and install wheels
55 |       run: |
56 |         set -e
57 |         python -m pip install wheel
58 |         python setup.py bdist_wheel
59 |         python -m pip install dist/*.whl
60 |     - name: Upload artifacts
61 |       uses: actions/upload-artifact@v2
62 |       with:
63 |         name: pip_wheel_${{ matrix.python-version }}
64 |         path: dist
65 | 


--------------------------------------------------------------------------------
/gtime/utils/fixtures.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import pytest
 4 | from pytest import fixture
 5 | import numpy as np
 6 | from sklearn.compose import make_column_selector
 7 | from sklearn.linear_model import LinearRegression, Ridge
 8 | 
 9 | from gtime.feature_extraction import Shift, MovingAverage
10 | from gtime.forecasting import GAR
11 | from gtime.time_series_models import TimeSeriesForecastingModel
12 | 
13 | 
14 | @fixture(scope="function")
15 | def features1():
16 |     return [
17 |         ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)),
18 |         ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)),
19 |         (
20 |             "moving_average_3",
21 |             MovingAverage(window_size=3),
22 |             make_column_selector(dtype_include=np.number),
23 |         ),
24 |     ]
25 | 
26 | 
27 | @fixture(scope="function")
28 | def features2():
29 |     return [
30 |         ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)),
31 |         ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)),
32 |     ]
33 | 
34 | 
35 | @fixture(scope="function")
36 | def model1():
37 |     lr = LinearRegression()
38 |     return GAR(lr)
39 | 
40 | 
41 | @fixture(scope="function")
42 | def model2():
43 |     lr = Ridge(alpha=0.1)
44 |     return GAR(lr)
45 | 
46 | 
47 | @fixture(scope="function")
48 | def time_series_forecasting_model1_no_cache(features1, model1):
49 |     return TimeSeriesForecastingModel(
50 |         features=features1, horizon=2, model=model1, cache_features=False,
51 |     )
52 | 
53 | 
54 | @fixture(scope="function")
55 | def time_series_forecasting_model1_cache(features1, model1):
56 |     return TimeSeriesForecastingModel(
57 |         features=features1, horizon=2, model=model1, cache_features=True,
58 |     )
59 | 
60 | 
61 | @pytest.fixture(scope="function")
62 | def estimator():
63 |     return LinearRegression()
64 | 
65 | 
66 | def _single_element_lazy_fixtures(*args):
67 |     return [pytest.lazy_fixture(arg.__name__) for arg in args[0]]
68 | 
69 | 
70 | def lazy_fixtures(*args):
71 |     if isinstance(args[0], tuple):
72 |         raise NotImplementedError
73 |         # return [tuple([pytest.lazy_fixture(arg[0].__name__), *arg[1:]]) for arg in args]
74 |     else:
75 |         return _single_element_lazy_fixtures(*args)
76 | 


--------------------------------------------------------------------------------
/gtime/external/make_holidays.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import warnings
10 | 
11 | import numpy as np
12 | import pandas as pd
13 | 
14 | import holidays as hdays_part1
15 | 
16 | import gtime.external.hdays as hdays_part2
17 | 
18 | 
19 | def get_holiday_names(country):
20 |     """Return all possible holiday names of given country
21 |     Parameters
22 |     ----------
23 |     country: country name
24 |     Returns
25 |     -------
26 |     A set of all possible holiday names of given country
27 |     """
28 |     years = np.arange(1995, 2045)
29 |     try:
30 |         with warnings.catch_warnings():
31 |             warnings.simplefilter("ignore")
32 |             holiday_names = getattr(hdays_part2, country)(years=years).values()
33 |     except AttributeError:
34 |         try:
35 |             holiday_names = getattr(hdays_part1, country)(years=years).values()
36 |         except AttributeError as e:
37 |             raise AttributeError(
38 |                 "Holidays in {} are not currently supported!".format(country)
39 |             ) from e
40 |     return set(holiday_names)
41 | 
42 | 
43 | def make_holidays_df(year_list, country, province=None):
44 |     """Make dataframe of holidays for given years and countries
45 |     Parameters
46 |     ----------
47 |     year_list: a list of years
48 |     country: country name
49 |     Returns
50 |     -------
51 |     Dataframe with 'ds' and 'holiday', which can directly feed
52 |     to 'holidays' params in Prophet
53 |     """
54 |     try:
55 |         holidays = getattr(hdays_part2, country)(years=year_list)
56 |     except AttributeError:
57 |         try:
58 |             holidays = getattr(hdays_part1, country)(prov=province, years=year_list)
59 |         except AttributeError as e:
60 |             raise AttributeError(
61 |                 "Holidays in {} are not currently supported!".format(country)
62 |             ) from e
63 |     holidays_df = pd.DataFrame(list(holidays.items()), columns=["ds", "holiday"])
64 |     holidays_df.reset_index(inplace=True, drop=True)
65 |     holidays_df["ds"] = pd.to_datetime(holidays_df["ds"])
66 |     return holidays_df
67 | 


--------------------------------------------------------------------------------
/gtime/regressors/tests/test_linear_regressor.py:
--------------------------------------------------------------------------------
 1 | from random import random
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | from hypothesis import given, settings
 6 | from hypothesis.extra.numpy import arrays
 7 | from hypothesis.strategies import floats
 8 | 
 9 | from gtime.regressors import LinearRegressor
10 | 
11 | 
12 | class TestLinearRegressor:
13 |     def test_linear_regressor(self):
14 |         train, test = train_test_dataframe()
15 | 
16 |         predictions = compute_predictions_for_train_test(train, test)
17 |         expected = compute_expectation_from_test(test)
18 | 
19 |         np.testing.assert_array_almost_equal(predictions, expected, decimal=2)
20 | 
21 |     @settings(deadline=None)
22 |     @given(
23 |         arrays(
24 |             dtype=float,
25 |             shape=(100, 1),
26 |             elements=floats(allow_nan=False, allow_infinity=False, width=16),
27 |         )
28 |     )
29 |     def test_linear_regressor_random_array(self, random_array):
30 |         train, test = train_test_dataframe(random_array)
31 | 
32 |         predictions = compute_predictions_for_train_test(train, test)
33 |         expected = compute_expectation_from_test(test)
34 | 
35 |         np.testing.assert_array_almost_equal(predictions, expected, decimal=0)
36 | 
37 | 
38 | def train_test_dataframe(
39 |     random_array: np.ndarray = None,
40 | ) -> (pd.DataFrame, pd.DataFrame):
41 |     random_array = (
42 |         random_array if random_array is not None else [random() for _ in range(100)]
43 |     )
44 | 
45 |     a1, a2, b = random() * 10, random() * 100, 2 * (1 - random())
46 | 
47 |     df = pd.DataFrame()
48 |     df["x1"] = list(range(100))
49 |     df["x2"] = random_array
50 |     df["y"] = [b + a1 * t for t in range(100)]
51 |     df["y"] = df["y"] + a2 * df["x2"]
52 | 
53 |     train = df[:90]
54 |     test = df[90:]
55 | 
56 |     return train, test
57 | 
58 | 
59 | def compute_predictions_for_train_test(
60 |     train: pd.DataFrame, test: pd.DataFrame
61 | ) -> np.ndarray:
62 |     lr = LinearRegressor()
63 | 
64 |     lr.fit(train[["x1", "x2"]], train["y"], x0=[0, 0, 0])
65 | 
66 |     preds_y = lr.predict(test[["x1", "x2"]])
67 |     preds_y = preds_y / np.sum(preds_y)
68 | 
69 |     return preds_y
70 | 
71 | 
72 | def compute_expectation_from_test(test: pd.DataFrame) -> np.ndarray:
73 |     test_y = test["y"].values
74 |     test_y = test_y / np.sum(test_y)
75 |     return test_y
76 | 


--------------------------------------------------------------------------------
/gtime/causality/tests/test_linear_coefficient.py:
--------------------------------------------------------------------------------
 1 | from random import randint
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from hypothesis import given, strategies as st
 6 | from pandas.util import testing as testing
 7 | 
 8 | from gtime.causality import ShiftedLinearCoefficient
 9 | from gtime.causality.tests.common import make_df_from_expected_shifts
10 | 
11 | 
12 | def test_linear_coefficient():
13 |     expected_shifts = [randint(2, 6) * 2 for _ in range(3)]
14 | 
15 |     df = make_df_from_expected_shifts(expected_shifts)
16 |     slc = ShiftedLinearCoefficient(target_col="A", max_shift=12)
17 |     slc.fit(df)
18 | 
19 |     shifts = slc.best_shifts_["A"][4:].values
20 |     np.testing.assert_array_equal(shifts, expected_shifts)
21 | 
22 | 
23 | # TODO: tests refactor TBD
24 | @given(st.integers(1, 20))
25 | @pytest.mark.skip(reason="TODO: Write proper test, increase hypothesis max duration")
26 | def test_linear_coefficient_hyp(shift):
27 |     testing.N, testing.K = 500, 1
28 |     df = testing.makeTimeDataFrame(freq="D")
29 |     df["shifted"] = df["A"].shift(shift)
30 | 
31 |     slc = ShiftedLinearCoefficient(target_col="A", max_shift=20)
32 |     slc.fit(df).transform(df)
33 | 
34 | 
35 | def test_linear_bootstrap_p_values():
36 |     # This test and the next one just test if the p_values on the diagonal are equal
37 |     # to 0. Is hard to implement other unittest, since the bootstrapping always
38 |     # gives different result. However, other properties could be tested
39 |     expected_shifts = [randint(2, 4) * 2 for _ in range(3)]
40 |     df = make_df_from_expected_shifts(expected_shifts)
41 |     shifted_test = ShiftedLinearCoefficient(
42 |         target_col="A", max_shift=8, bootstrap_iterations=500,
43 |     )
44 |     shifted_test.fit(df)
45 | 
46 |     linear_p_values = shifted_test.bootstrap_p_values_
47 |     for col_index in range(len(linear_p_values.columns)):
48 |         assert linear_p_values.iloc[col_index, col_index] == 0
49 | 
50 | 
51 | def test_linear_permutation_p_values():
52 |     expected_shifts = [randint(2, 4) * 2 for _ in range(3)]
53 |     df = make_df_from_expected_shifts(expected_shifts)
54 |     shifted_test = ShiftedLinearCoefficient(
55 |         target_col="A", max_shift=8, permutation_iterations=50,
56 |     )
57 |     shifted_test.fit(df)
58 | 
59 |     linear_p_values = shifted_test.permutation_p_values_
60 |     for col_index in range(len(linear_p_values.columns)):
61 |         assert linear_p_values.iloc[col_index, col_index] == 0
62 | 


--------------------------------------------------------------------------------
/gtime/feature_extraction/tests/test_trend.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from gtime.feature_extraction import Detrender
 5 | 
 6 | 
 7 | def test_polynomial_detrend():
 8 |     time_index = pd.date_range(start="2020-01-01", end="2020-01-20")
 9 |     ts = pd.DataFrame(range(0, 20), index=time_index)
10 | 
11 |     detrend_feature = Detrender(trend="polynomial", trend_x0=np.zeros(3))
12 |     feature_name = detrend_feature.__class__.__name__
13 |     ts_t = detrend_feature.fit_transform(ts)
14 |     expected_ts = pd.DataFrame(
15 |         [
16 |             1.22681324e-05,
17 |             8.34525141e-06,
18 |             4.86108426e-06,
19 |             1.81563099e-06,
20 |             -7.91108403e-07,
21 |             -2.95913392e-06,
22 |             -4.68844555e-06,
23 |             -5.97904330e-06,
24 |             -6.83092717e-06,
25 |             -7.24409716e-06,
26 |             -7.21855327e-06,
27 |             -6.75429551e-06,
28 |             -5.85132385e-06,
29 |             -4.50963832e-06,
30 |             -2.72923891e-06,
31 |             -5.10125625e-07,
32 |             2.14770155e-06,
33 |             5.24424260e-06,
34 |             8.77949753e-06,
35 |             1.27534663e-05,
36 |         ],
37 |         columns=[f"0__{feature_name}"],
38 |         index=time_index,
39 |     )
40 |     pd.testing.assert_frame_equal(ts_t, expected_ts, check_less_precise=3)
41 | 
42 | 
43 | def test_exponential_detrend():
44 |     time_index = pd.date_range(start="2020-01-01", end="2020-01-20")
45 |     ts = pd.DataFrame(range(0, 20), index=time_index)
46 | 
47 |     detrend_feature = Detrender(trend="exponential", trend_x0=0)
48 |     feature_name = detrend_feature.__class__.__name__
49 |     ts_t = detrend_feature.fit_transform(ts)
50 |     expected_ts = pd.DataFrame(
51 |         [
52 |             -1.0,
53 |             -0.18238542,
54 |             0.60196471,
55 |             1.34698345,
56 |             2.04549733,
57 |             2.68902453,
58 |             3.26753629,
59 |             3.76917473,
60 |             4.1799193,
61 |             4.48319226,
62 |             4.65939237,
63 |             4.68534338,
64 |             4.53364205,
65 |             4.17188719,
66 |             3.5617681,
67 |             2.65798675,
68 |             1.40698343,
69 |             -0.25457009,
70 |             -2.40155216,
71 |             -5.1224979,
72 |         ],
73 |         columns=[f"0__{feature_name}"],
74 |         index=time_index,
75 |     )
76 |     pd.testing.assert_frame_equal(ts_t, expected_ts)
77 | 


--------------------------------------------------------------------------------
/gtime/feature_extraction/tests/test_sorted_density.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pandas.util.testing as testing
 4 | import pytest
 5 | 
 6 | from gtime.feature_extraction.custom import SortedDensity
 7 | 
 8 | 
 9 | def get_input_data():
10 |     input_data = pd.DataFrame.from_dict({"x_1": [0, 7, 2], "x_2": [2, 10, 4]})
11 |     input_data.index = [
12 |         pd.Timestamp(2000, 1, 1),
13 |         pd.Timestamp(2000, 2, 1),
14 |         pd.Timestamp(2000, 3, 1),
15 |     ]
16 |     return input_data
17 | 
18 | 
19 | def get_output_causal():
20 |     custom_feature = SortedDensity(window_size=2, is_causal=True)
21 |     feature_name = custom_feature.__class__.__name__
22 |     output_causal = pd.DataFrame.from_dict(
23 |         {
24 |             f"x_1__{feature_name}": [np.nan, 0.5, 0.6111111111111112],
25 |             f"x_2__{feature_name}": [np.nan, 0.5833333333333334, 0.6428571428571429],
26 |         }
27 |     )
28 |     output_causal.index = [
29 |         pd.Timestamp(2000, 1, 1),
30 |         pd.Timestamp(2000, 2, 1),
31 |         pd.Timestamp(2000, 3, 1),
32 |     ]
33 |     return output_causal
34 | 
35 | 
36 | def get_output_anticausal():
37 |     custom_feature = SortedDensity(window_size=2, is_causal=False)
38 |     feature_name = custom_feature.__class__.__name__
39 |     output_anticausal = pd.DataFrame.from_dict(
40 |         {
41 |             f"x_1__{feature_name}": [0.5, 0.6111111111111112],
42 |             f"x_2__{feature_name}": [0.5833333333333334, 0.6428571428571429],
43 |         }
44 |     )
45 |     output_anticausal.index = [
46 |         pd.Timestamp(2000, 2, 1),
47 |         pd.Timestamp(2000, 3, 1),
48 |     ]
49 |     return output_anticausal
50 | 
51 | 
52 | input_data = get_input_data()
53 | output_causal = get_output_causal()
54 | output_anticausal = get_output_anticausal()
55 | 
56 | 
57 | class TestSortedDensity:
58 |     @pytest.mark.parametrize("test_input, expected", [(input_data, output_causal)])
59 |     def test_crest_factor_detrending_causal(self, test_input, expected):
60 |         feature = SortedDensity(window_size=2, is_causal=True)
61 |         output = feature.fit_transform(test_input)
62 |         testing.assert_frame_equal(output, expected)
63 | 
64 |     @pytest.mark.parametrize("test_input, expected", [(input_data, output_anticausal)])
65 |     def test_crest_factor_detrending_anticausal(self, test_input, expected):
66 |         feature = SortedDensity(window_size=2, is_causal=False)
67 |         output = feature.fit_transform(test_input)
68 |         testing.assert_frame_equal(output, expected)
69 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """Toolbox for Time Series Analysis."""
 3 | 
 4 | import os
 5 | import codecs
 6 | 
 7 | from setuptools import setup, find_packages
 8 | 
 9 | from gtime import __version__
10 | 
11 | version_file = os.path.join("gtime", "_version.py")
12 | with open(version_file) as f:
13 |     exec(f.read())
14 | 
15 | with open("requirements.txt") as f:
16 |     requirements = f.read().splitlines()
17 | with open("doc-requirements.txt") as f:
18 |     doc_requirements = f.read().splitlines()
19 | with open("dev-requirements.txt") as f:
20 |     dev_requirements = f.read().splitlines()
21 | 
22 | DISTNAME = "giotto-time"
23 | DESCRIPTION = "Toolbox for Time Series analysis and integration with Machine Learning."
24 | with codecs.open("README.md", encoding="utf-8-sig") as f:
25 |     LONG_DESCRIPTION = f.read()
26 | LONG_DESCRIPTION_TYPE = "text/markdown"
27 | MAINTAINER = "Alessio Baccelli"
28 | MAINTAINER_EMAIL = "maintainers@giotto.ai"
29 | URL = "https://github.com/giotto-ai/giotto-time"
30 | LICENSE = "AGPLv3"
31 | DOWNLOAD_URL = "https://github.com/giotto-ai/giotto-time/tarball/v0.0a0"
32 | VERSION = __version__
33 | CLASSIFIERS = [
34 |     "Intended Audience :: Information Technology",
35 |     "Intended Audience :: Developers",
36 |     "License :: OSI Approved",
37 |     "Programming Language :: Python",
38 |     "Topic :: Software Development",
39 |     "Topic :: Scientific/Engineering",
40 |     "Operating System :: Microsoft :: Windows",
41 |     "Operating System :: POSIX",
42 |     "Operating System :: Unix",
43 |     "Operating System :: MacOS",
44 |     "Programming Language :: Python :: 3.7",
45 |     "Programming Language :: Python :: 3.8",
46 |     "Programming Language :: Python :: 3.9",
47 | ]
48 | KEYWORDS = (
49 |     "machine learning time series data analysis " + "topology, persistence diagrams"
50 | )
51 | INSTALL_REQUIRES = requirements
52 | EXTRAS_REQUIRE = {
53 |     "tests": dev_requirements,
54 |     "doc": doc_requirements,
55 |     "examples": [],
56 | }
57 | 
58 | 
59 | setup(
60 |     name=DISTNAME,
61 |     maintainer=MAINTAINER,
62 |     maintainer_email=MAINTAINER_EMAIL,
63 |     description=DESCRIPTION,
64 |     license=LICENSE,
65 |     url=URL,
66 |     version=VERSION,
67 |     download_url=DOWNLOAD_URL,
68 |     long_description=LONG_DESCRIPTION,
69 |     long_description_content_type=LONG_DESCRIPTION_TYPE,
70 |     zip_safe=False,
71 |     classifiers=CLASSIFIERS,
72 |     packages=find_packages(),
73 |     keywords=KEYWORDS,
74 |     install_requires=INSTALL_REQUIRES,
75 |     extras_require=EXTRAS_REQUIRE,
76 | )
77 | 


--------------------------------------------------------------------------------
/gtime/feature_extraction/tests/test_crest_factor_detrending.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pandas.util.testing as testing
 4 | import pytest
 5 | 
 6 | from gtime.feature_extraction.custom import CrestFactorDetrending
 7 | 
 8 | 
 9 | def get_input_data():
10 |     input_data = pd.DataFrame.from_dict({"x_1": [0, 7, 2], "x_2": [2, 10, 4]})
11 |     input_data.index = [
12 |         pd.Timestamp(2000, 1, 1),
13 |         pd.Timestamp(2000, 2, 1),
14 |         pd.Timestamp(2000, 3, 1),
15 |     ]
16 |     return input_data
17 | 
18 | 
19 | def get_output_causal():
20 |     custom_feature = CrestFactorDetrending(window_size=2, is_causal=True)
21 |     feature_name = custom_feature.__class__.__name__
22 |     output_causal = pd.DataFrame.from_dict(
23 |         {
24 |             f"x_1__{feature_name}": [np.nan, 1.0, 0.07547169811320754],
25 |             f"x_2__{feature_name}": [np.nan, 0.9615384615384616, 0.13793103448275862],
26 |         }
27 |     )
28 |     output_causal.index = [
29 |         pd.Timestamp(2000, 1, 1),
30 |         pd.Timestamp(2000, 2, 1),
31 |         pd.Timestamp(2000, 3, 1),
32 |     ]
33 |     return output_causal
34 | 
35 | 
36 | def get_output_anticausal():
37 |     custom_feature = CrestFactorDetrending(window_size=2, is_causal=False)
38 |     feature_name = custom_feature.__class__.__name__
39 |     output_anticausal = pd.DataFrame.from_dict(
40 |         {
41 |             f"x_1__{feature_name}": [1.0, 0.07547169811320754],
42 |             f"x_2__{feature_name}": [0.9615384615384616, 0.13793103448275862],
43 |         }
44 |     )
45 |     output_anticausal.index = [
46 |         pd.Timestamp(2000, 2, 1),
47 |         pd.Timestamp(2000, 3, 1),
48 |     ]
49 |     return output_anticausal
50 | 
51 | 
52 | input_data = get_input_data()
53 | output_causal = get_output_causal()
54 | output_anticausal = get_output_anticausal()
55 | 
56 | 
57 | class TestCrestFactorDetrending:
58 |     @pytest.mark.parametrize("test_input, expected", [(input_data, output_causal)])
59 |     def test_crest_factor_detrending_causal(self, test_input, expected):
60 |         feature = CrestFactorDetrending(window_size=2, is_causal=True)
61 |         output = feature.fit_transform(test_input)
62 |         testing.assert_frame_equal(output, expected)
63 | 
64 |     @pytest.mark.parametrize("test_input, expected", [(input_data, output_anticausal)])
65 |     def test_crest_factor_detrending_anticausal(self, test_input, expected):
66 |         feature = CrestFactorDetrending(window_size=2, is_causal=False)
67 |         output = feature.fit_transform(test_input)
68 |         testing.assert_frame_equal(output, expected)
69 | 


--------------------------------------------------------------------------------
/gtime/time_series_models/tests/test_simple_models.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import pytest
 4 | from pandas.util import testing as testing
 5 | from hypothesis import given, note
 6 | import hypothesis.strategies as st
 7 | from gtime.utils.hypothesis.time_indexes import giotto_time_series
 8 | 
 9 | 
10 | from gtime.time_series_models import (
11 |     Naive,
12 |     SeasonalNaive,
13 |     Average,
14 |     Drift,
15 | )
16 | 
17 | 
18 | @st.composite
19 | def forecast_input(draw, max_lenth):
20 |     length = draw(st.integers(min_value=4, max_value=max_lenth))
21 |     horizon = draw(st.integers(min_value=1, max_value=length - 1))
22 |     window = draw(st.integers(min_value=1, max_value=length - horizon))
23 |     df = draw(
24 |         giotto_time_series(
25 |             min_length=horizon + window,
26 |             max_length=max_lenth,
27 |             allow_nan=False,
28 |             allow_infinity=False,
29 |         )
30 |     )
31 |     return df, horizon, window
32 | 
33 | 
34 | class TestNaiveForecast:
35 |     @given(x=forecast_input(50))
36 |     def test_fit_predict(self, x):
37 |         df, horizon, _ = x
38 |         model = Naive(horizon=horizon)
39 |         model.fit(df)
40 |         y_pred = model.predict()
41 |         assert y_pred.shape == (horizon, horizon)
42 |         res = np.broadcast_to(df.iloc[-horizon:], (horizon, horizon))
43 |         y_cols = ["y_" + str(x + 1) for x in range(horizon)]
44 |         expected_df = pd.DataFrame(res, index=model.X_test_.index, columns=y_cols)
45 |         testing.assert_frame_equal(y_pred, expected_df)
46 | 
47 | 
48 | class TestSeasonalNaiveForecast:
49 |     @given(x=forecast_input(50))
50 |     def test_fit_predict(self, x):
51 |         df, horizon, seasonal_length = x
52 |         model = SeasonalNaive(horizon=horizon, seasonal_length=seasonal_length)
53 |         model.fit(df)
54 |         y_pred = model.predict()
55 |         note(y_pred)
56 |         assert y_pred.shape[1] == horizon
57 |         if seasonal_length < horizon:
58 |             assert all(y_pred.iloc[:, 0] == y_pred.iloc[:, seasonal_length])
59 | 
60 | 
61 | class TestAverageForecast:
62 |     @given(x=forecast_input(50))
63 |     def test_fit_predict(self, x):
64 |         df, horizon, _ = x
65 |         model = Average(horizon=horizon)
66 |         model.fit(df)
67 |         y_pred = model.predict()
68 |         note(y_pred)
69 |         assert y_pred.shape == (horizon, horizon)
70 |         assert pytest.approx(y_pred.diff(axis=1).sum().sum()) == 0
71 |         means = [df.mean()] + [df.iloc[:-i].mean() for i in range(1, horizon)]
72 | 
73 | 
74 | class TestDriftForecast:
75 |     @given(x=forecast_input(50))
76 |     def test_fit_predict(self, x):
77 |         df, horizon, _ = x
78 |         model = Drift(horizon=horizon)
79 |         model.fit(df)
80 |         y_pred = model.predict()
81 |         note(y_pred)
82 |         assert len(y_pred) == horizon
83 |         # assert pytest.approx(y_pred.diff().diff().sum().sum()) == 0
84 | 


--------------------------------------------------------------------------------
/gtime/compose/feature_creation.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.compose import ColumnTransformer
 3 | 
 4 | 
 5 | class FeatureCreation(ColumnTransformer):
 6 |     """Applies transformers to columns of a pandas DataFrame.
 7 | 
 8 |     This estimator is a wrapper of sklearn.compose.ColumnTransformer, the only
 9 |     difference is the output type of fit_transform and transform methods which is a
10 |     DataFrame instead of an array.
11 | 
12 |     """
13 | 
14 |     def fit_transform(self, X: pd.DataFrame, y: pd.DataFrame = None):
15 |         """Fit all transformers, transform the data and concatenate results.
16 | 
17 |         Parameters
18 |         ----------
19 |         X : pd.DataFrame, shape (n_samples, n_features), required
20 |             Input data, of which specified subsets are used to fit the
21 |             transformers.
22 | 
23 |         y : pd.DataFrame, shape (n_samples, ...), optional, default: ``None``
24 |             Targets for supervised learning.
25 | 
26 |         Examples
27 |         --------
28 |         >>> import pandas.util.testing as testing
29 |         >>> from gtime.compose import FeatureCreation
30 |         >>> from gtime.feature_extraction import Shift, MovingAverage
31 |         >>> data = testing.makeTimeDataFrame(freq="s")
32 |         >>> fc = FeatureCreation([
33 |         ...     ('s1', Shift(1), ['A']),
34 |         ...     ('ma3', MovingAverage(window_size=3), ['B']),
35 |         ... ])
36 |         >>> fc.fit_transform(data).head()
37 |                              s1__A__Shift  ma3__B__MovingAverage
38 |         2000-01-01 00:00:00           NaN                    NaN
39 |         2000-01-01 00:00:01      0.211403                    NaN
40 |         2000-01-01 00:00:02     -0.313854               0.085045
41 |         2000-01-01 00:00:03      0.502018              -0.239269
42 |         2000-01-01 00:00:04     -0.225324              -0.144625
43 | 
44 |         Returns
45 |         -------
46 |         X_t_df : pd.DataFrame, shape (n_samples, sum_n_components)
47 |             hstack of results of transformers. sum_n_components is the
48 |             sum of n_components (output dimension) over transformers.
49 | 
50 |         """
51 |         X_t = super().fit_transform(X, y)
52 |         X_t_df = pd.DataFrame(data=X_t, columns=self.get_feature_names(), index=X.index)
53 |         return X_t_df
54 | 
55 |     def transform(self, X: pd.DataFrame):
56 |         """Transform X separately by each transformer, concatenate results.
57 | 
58 |         Parameters
59 |         ----------
60 |         X : pd.DataFrame, shape (n_samples, n_features), required
61 |             The data to be transformed by subset.
62 | 
63 |         Returns
64 |         -------
65 |         X_t_df : DataFrame, shape (n_samples, sum_n_components)
66 |             hstack of results of transformers. sum_n_components is the
67 |             sum of n_components (output dimension) over transformers. If
68 |             any result is a sparse matrix, everything will be converted to
69 |             sparse matrices.
70 | 
71 |         """
72 |         X_t = super().transform(X)
73 |         X_t_df = pd.DataFrame(data=X_t, columns=self.get_feature_names(), index=X.index)
74 |         return X_t_df
75 | 


--------------------------------------------------------------------------------
/gtime/model_selection/tests/test_splitters.py:
--------------------------------------------------------------------------------
 1 | import hypothesis.strategies as st
 2 | import numpy as np
 3 | import pytest
 4 | from hypothesis import given, settings, HealthCheck
 5 | from sklearn.compose import make_column_selector
 6 | 
 7 | from gtime.compose import FeatureCreation
 8 | from gtime.feature_extraction import Shift, MovingAverage
 9 | from gtime.model_selection import horizon_shift
10 | from gtime.model_selection.splitters import FeatureSplitter
11 | from gtime.utils.hypothesis.feature_matrices import X_y_matrices
12 | 
13 | # TODO: refactor, make hypothesis generator instead of a full pipeline
14 | from gtime.utils.hypothesis.time_indexes import giotto_time_series
15 | 
16 | df_transformer = FeatureCreation(
17 |     [
18 |         ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)),
19 |         ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)),
20 |         (
21 |             "moving_average_3",
22 |             MovingAverage(window_size=3),
23 |             make_column_selector(dtype_include=np.number),
24 |         ),
25 |     ]
26 | )
27 | 
28 | horizon = 4
29 | 
30 | 
31 | class TestFeatureSplitter:
32 |     def test_constructor(self):
33 |         FeatureSplitter()
34 | 
35 |     @given(st.text().filter(lambda x: x != "any"))
36 |     def test_constructor_wrong_parameter(self, drop_na_mode: str):
37 |         with pytest.raises(ValueError):
38 |             FeatureSplitter(drop_na_mode)
39 | 
40 |     @settings(suppress_health_check=(HealthCheck.too_slow,))
41 |     @given(
42 |         X_y_matrices(
43 |             horizon=horizon, df_transformer=df_transformer, allow_nan_infinity=False,
44 |         )
45 |     )
46 |     def test_transform(self, X_y):
47 |         X, y = X_y
48 |         feature_splitter = FeatureSplitter()
49 |         X_train, y_train, X_test, y_test = feature_splitter.transform(X, y)
50 | 
51 |         assert X_train.shape[0] == max(0, X.shape[0] - 2 - horizon)
52 |         assert y_train.shape[0] == X_train.shape[0]
53 |         assert X_test.shape[0] == min(max(0, X.shape[0] - 2), horizon)
54 |         assert y_test.shape[0] == X_test.shape[0]
55 | 
56 | 
57 | class TestHorizonShift:
58 |     @given(
59 |         giotto_time_series(min_length=10, allow_infinity=False, allow_nan=False),
60 |         st.integers(1, 8),
61 |     )
62 |     def test_horizon_int(self, time_series, horizon):
63 |         y_shifted = horizon_shift(time_series, horizon)
64 |         assert y_shifted.shape[1] == horizon
65 | 
66 |         # Check first line of y_shifted
67 |         for i in range(1, horizon + 1):
68 |             assert time_series.iloc[i, 0] == y_shifted.iloc[0, i - 1]
69 | 
70 |     @given(
71 |         giotto_time_series(min_length=10, allow_infinity=False, allow_nan=False),
72 |         st.sets(elements=st.integers(1, 8), min_size=1, max_size=8),
73 |     )
74 |     def test_horizon_list(self, time_series, horizon):
75 |         horizon = list(sorted(horizon))
76 |         y_shifted = horizon_shift(time_series, horizon)
77 |         assert y_shifted.shape[1] == len(horizon)
78 | 
79 |         # Check first line of y_shifted
80 |         for i, elem in enumerate(horizon):
81 |             assert time_series.iloc[elem, 0] == y_shifted.iloc[0, i]
82 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # http://www.sphinx-doc.org/en/master/config
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | import sphinx_rtd_theme  # noqa
16 | 
17 | sys.path.insert(0, os.path.abspath(os.path.join("..", "..")))
18 | # sys.path.insert(0, os.path.abspath("../"))
19 | 
20 | # -- Project information -----------------------------------------------------
21 | 
22 | project = "giotto-time"
23 | copyright = "2022, L2F"
24 | 
25 | # The full version, including alpha/beta/rc tags
26 | from gtime import __version__
27 | 
28 | release = __version__
29 | 
30 | # -- General configuration ---------------------------------------------------
31 | 
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 |     "sphinx.ext.autodoc",
37 |     "sphinx_rtd_theme",
38 | ]
39 | 
40 | # this is needed for some reason...
41 | # see https://github.com/numpy/numpydoc/issues/69
42 | # numpydoc_class_members_toctree = False
43 | 
44 | # Add any paths that contain templates here, relative to this directory.
45 | templates_path = ["_templates"]
46 | 
47 | # generate autosummary even if no references
48 | #autosummary_generate = True
49 | 
50 | # The suffix of source filenames.
51 | # source_suffix = ".rst"
52 | 
53 | # The encoding of source files.
54 | # source_encoding = 'utf-8'
55 | 
56 | # The master toctree document.
57 | # master_doc = "index"
58 | 
59 | # List of patterns, relative to source directory, that match files and
60 | # directories to ignore when looking for source files.
61 | # This pattern also affects html_static_path and html_extra_path.
62 | exclude_patterns = []
63 | 
64 | # If true, '()' will be appended to :func: etc. cross-reference text.
65 | # add_function_parentheses = False
66 | 
67 | # If true, the current module name will be prepended to all description
68 | # unit titles (such as .. function::).
69 | # add_module_names = True
70 | 
71 | # If true, sectionauthor and moduleauthor directives will be shown in the
72 | # output. They are ignored by default.
73 | # show_authors = False
74 | 
75 | # The name of the Pygments (syntax highlighting) style to use.
76 | # pygments_style = "sphinx"
77 | 
78 | # A list of ignored prefixes for module index sorting.
79 | # modindex_common_prefix = []
80 | # -- Options for HTML output -------------------------------------------------
81 | 
82 | # The theme to use for HTML and HTML Help pages.  See the documentation for
83 | # a list of builtin themes.
84 | #
85 | html_theme = "sphinx_rtd_theme"
86 | 
87 | # Add any paths that contain custom static files (such as style sheets) here,
88 | # relative to this directory. They are copied after the builtin static files,
89 | # so a file named "default.css" will overwrite the builtin "default.css".
90 | html_static_path = []  # ['_static']
91 | 


--------------------------------------------------------------------------------
/gtime/forecasting/tests/test_naive.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pytest
 4 | from pandas.util import testing as testing
 5 | from hypothesis import given, note
 6 | import hypothesis.strategies as st
 7 | from gtime.utils.hypothesis.time_indexes import giotto_time_series
 8 | from gtime.model_selection import horizon_shift, FeatureSplitter
 9 | 
10 | from gtime.forecasting import (
11 |     NaiveForecaster,
12 |     SeasonalNaiveForecaster,
13 |     DriftForecaster,
14 |     AverageForecaster,
15 | )
16 | 
17 | 
18 | @st.composite
19 | def forecast_input(draw, max_lenth):
20 |     length = draw(st.integers(min_value=2, max_value=max_lenth))
21 |     horizon = draw(st.integers(min_value=1, max_value=length - 1))
22 |     X = draw(
23 |         giotto_time_series(
24 |             min_length=length,
25 |             max_length=max_lenth,
26 |             allow_nan=False,
27 |             allow_infinity=False,
28 |         )
29 |     )
30 |     y = horizon_shift(X, horizon=horizon)
31 |     X_train, y_train, X_test, y_test = FeatureSplitter().transform(X, y)
32 |     return X_train, y_train, X_test
33 | 
34 | 
35 | class SimplePipelineTest:
36 |     def setup(self, data, Model):
37 |         X_train, y_train, X_test = data
38 |         self.model = Model
39 |         self.model.fit(X_train, y_train)
40 |         self.X_test = X_test
41 |         self.y_pred = self.model.predict(X_test)
42 | 
43 |     def test_fit_horizon(self):
44 |         assert self.model.horizon_ == len(self.X_test)
45 | 
46 |     def test_predict_shape(self):
47 |         assert self.y_pred.shape == (self.model.horizon_, self.model.horizon_)
48 | 
49 | 
50 | class TestNaiveModel(SimplePipelineTest):
51 |     @given(data=forecast_input(50))
52 |     def setup(self, data):
53 |         super().setup(data, NaiveForecaster())
54 | 
55 |     def test_predict_df(self):
56 |         horizon = len(self.X_test)
57 |         y_cols = ["y_" + str(x + 1) for x in range(len(self.X_test))]
58 |         res = np.broadcast_to(self.X_test, (horizon, horizon))
59 |         expected_df = pd.DataFrame(res, index=self.X_test.index, columns=y_cols)
60 |         testing.assert_frame_equal(self.y_pred, expected_df)
61 | 
62 | 
63 | class TestSeasonalNaiveModel(SimplePipelineTest):
64 |     @given(data=forecast_input(50), season_length=st.data())
65 |     def setup(self, data, season_length):
66 |         season_length = season_length.draw(
67 |             st.integers(min_value=1, max_value=len(data[0]))
68 |         )
69 |         self.season_length = season_length
70 |         super().setup(data, SeasonalNaiveForecaster(seasonal_length=season_length))
71 | 
72 |     def test_predict_seasonality(self):
73 |         if self.season_length < self.model.horizon_:
74 |             assert all(
75 |                 self.y_pred.iloc[:, 0] == self.y_pred.iloc[:, self.season_length]
76 |             )
77 | 
78 | 
79 | class TestDriftModel(SimplePipelineTest):
80 |     @given(data=forecast_input(50))
81 |     def setup(self, data):
82 |         super().setup(data, DriftForecaster())
83 | 
84 |     def test_predict_drift(self):
85 |         pytest.approx(self.y_pred.diff().diff().sum().sum())
86 |         # assert pytest.approx(self.y_pred.diff().diff().sum().sum()) == 0
87 | 
88 | 
89 | class TestAverageModel(SimplePipelineTest):
90 |     @given(data=forecast_input(50))
91 |     def setup(self, data):
92 |         super().setup(data, AverageForecaster())
93 | 
94 |     def test_predict_difference(self):
95 |         assert pytest.approx(self.y_pred.diff(axis=1).sum().sum()) == 0
96 | 


--------------------------------------------------------------------------------
/examples/hierarchical_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Hierarchical model\n",
  8 |     "This exemple shows how the hierarchical model can be used"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import sys\n",
 18 |     "sys.path.append('../')"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import pandas as pd\n",
 28 |     "import numpy as np\n",
 29 |     "import matplotlib.pyplot as plt\n",
 30 |     "import networkx as nx\n",
 31 |     "%matplotlib inline  \n",
 32 |     "\n",
 33 |     "from gtime.hierarchical import HierarchicalMiddleOut\n",
 34 |     "from gtime.hierarchical import HierarchicalTopDown\n",
 35 |     "from gtime.hierarchical import HierarchicalBottomUp\n",
 36 |     "import pandas._testing as testing\n",
 37 |     "from gtime.time_series_models import AR"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "testing.N, testing.K = 20, 1\n",
 47 |     "\n",
 48 |     "data1 = testing.makeTimeDataFrame(freq=\"s\")\n",
 49 |     "data2 = testing.makeTimeDataFrame(freq=\"s\")\n",
 50 |     "data3 = testing.makeTimeDataFrame(freq=\"s\")\n",
 51 |     "data4 = testing.makeTimeDataFrame(freq=\"s\")\n",
 52 |     "data5 = testing.makeTimeDataFrame(freq=\"s\")\n",
 53 |     "data6 = testing.makeTimeDataFrame(freq=\"s\")\n",
 54 |     "data = {'data1': data1, 'data2': data2, 'data3' : data3, 'data4' : data4, 'data5' : data5, 'data6' : data6}"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "tree_adj = {'data1' : ['data2','data3'], 'data2': ['data4', 'data5'], 'data3':['data6'], 'data4':[], 'data5':[], 'data6':[]} "
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "stat_model = AR(p=2, horizon=3)\n",
 73 |     "middle_out_model = HierarchicalMiddleOut(model=stat_model, hierarchy_tree=tree_adj, method='tdsga', level=0)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "fitting_middle_out = middle_out_model.fit(data)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {
 89 |     "scrolled": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "fitting_middle_out.predict(data)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": []
102 |   }
103 |  ],
104 |  "metadata": {
105 |   "kernelspec": {
106 |    "display_name": "Python 3 (ipykernel)",
107 |    "language": "python",
108 |    "name": "python3"
109 |   },
110 |   "language_info": {
111 |    "codemirror_mode": {
112 |     "name": "ipython",
113 |     "version": 3
114 |    },
115 |    "file_extension": ".py",
116 |    "mimetype": "text/x-python",
117 |    "name": "python",
118 |    "nbconvert_exporter": "python",
119 |    "pygments_lexer": "ipython3",
120 |    "version": "3.9.13"
121 |   }
122 |  },
123 |  "nbformat": 4,
124 |  "nbformat_minor": 4
125 | }
126 | 


--------------------------------------------------------------------------------
/gtime/regressors/linear_regressor.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from scipy.optimize import minimize
  4 | from sklearn.metrics import mean_squared_error
  5 | from sklearn.utils.validation import check_is_fitted
  6 | 
  7 | 
  8 | class LinearRegressor:
  9 |     """Implementation of a LinearRegressor that takes a custom loss function.
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     loss : Callable, optional, default: ``mean_squared_error``
 14 |         The loss function to use when fitting the model. The loss function must accept
 15 |         y_true, y_pred and return a single real number.
 16 | 
 17 |     Examples
 18 |     --------
 19 |     >>> from gtime.regressors.linear_regressor import LinearRegressor
 20 |     >>> from gtime.metrics import max_error
 21 |     >>> import numpy as np
 22 |     >>> import pandas as pd
 23 |     >>> X = np.random.random((100, 10))
 24 |     >>> y = np.random.random(100)
 25 |     >>> lr = LinearRegressor(loss=max_error)
 26 |     >>> X_train, y_train = X[:90], y[:90]
 27 |     >>> X_test, y_test = X[90:], y[90:]
 28 |     >>> x0 = [0]*11
 29 |     >>> lr.fit(X_train, y_train, x0=x0)
 30 |     >>> lr.predict(X_test)
 31 |     array([0.62987155, 0.46971378, 0.50421395, 0.5543149 , 0.50848151,
 32 |            0.54768797, 0.50968854, 0.50500384, 0.58069366, 0.54912972])
 33 | 
 34 |     """
 35 | 
 36 |     def __init__(self, loss=mean_squared_error):
 37 |         self.loss = loss
 38 | 
 39 |     def fit(self, X: pd.DataFrame, y: pd.DataFrame, **kwargs) -> "LinearRegressor":
 40 |         """Fit the linear model on ``X`` and ``y`` on the given loss function.To do the
 41 |         minimization, the ``scipy.optimize.minimize`` function is used. To have more
 42 |         details and check which kind of options are available, please refer to the scipy
 43 |         `documentation
 44 |         <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_.
 45 | 
 46 |         Parameters
 47 |         ----------
 48 |         X : pd.DataFrame, shape (n_samples, n_features), required
 49 |             The X matrix used as features in the fitting procedure.
 50 | 
 51 |         y : pd.DataFrame, shape (n_samples, 1), required
 52 |             The y matrix to use as target values in the fitting procedure.
 53 | 
 54 |         kwargs: dict, optional.
 55 |             Optional arguments to pass to the ``minimize`` function of scipy.
 56 | 
 57 |         Returns
 58 |         -------
 59 |         self: LinearRegressor
 60 |             The fitted model.
 61 | 
 62 |         """
 63 | 
 64 |         if isinstance(X, pd.DataFrame):
 65 |             X = X.values
 66 | 
 67 |         if isinstance(y, pd.DataFrame):
 68 |             y = y.values
 69 | 
 70 |         def prediction_error(model_weights):
 71 |             predictions = [
 72 |                 model_weights[0] + np.dot(model_weights[1:], row) for row in X
 73 |             ]
 74 |             return self.loss(y, predictions)
 75 | 
 76 |         res = minimize(prediction_error, **kwargs)
 77 | 
 78 |         self.model_weights_ = res["x"]
 79 | 
 80 |         return self
 81 | 
 82 |     def predict(self, X: pd.DataFrame) -> pd.DataFrame:
 83 |         """Predict the y values associated to the features ``X``.
 84 | 
 85 |         Parameters
 86 |         ----------
 87 |         X : pd.DataFrame, shape (n_samples, n_features), required
 88 |             The features used to predict.
 89 | 
 90 |         Returns
 91 |         -------
 92 |         predictions : pd.DataFrame, shape (n_samples, 1)
 93 |             The predictions of the model
 94 | 
 95 |         """
 96 |         check_is_fitted(self)
 97 | 
 98 |         predictions = self.model_weights_[0] + np.dot(X, self.model_weights_[1:])
 99 |         return predictions
100 | 


--------------------------------------------------------------------------------
/gtime/plotting/tests/test_plotting.py:
--------------------------------------------------------------------------------
 1 | from hypothesis import given, settings
 2 | import hypothesis.strategies as st
 3 | import pytest
 4 | import pandas as pd
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | from gtime.utils.hypothesis.time_indexes import giotto_time_series
 8 | 
 9 | from gtime.plotting import lag_plot, acf_plot, seasonal_subplots, seasonal_plot
10 | from gtime.plotting.preprocessing import seasonal_split
11 | 
12 | 
13 | @pytest.fixture()
14 | def time_series():
15 |     idx = pd.period_range(start="2000-01-01", end="2003-01-01")
16 |     df = pd.DataFrame(np.random.random((len(idx), 1)), index=idx, columns=["ts"])
17 |     return df
18 | 
19 | 
20 | class TestLagplots:
21 |     @pytest.mark.parametrize("lags", [1, 5, [1], [1, 3, 5, 100]])
22 |     def test_subplots_number(self, time_series, lags):
23 |         ax = lag_plot(time_series, lags=lags)
24 |         num_plots = sum(map(lambda x: x.has_data(), ax.flatten()))
25 |         if isinstance(lags, int):
26 |             expected_num_plots = lags
27 |         else:
28 |             expected_num_plots = len(lags)
29 |         assert num_plots == expected_num_plots
30 |         plt.close("all")
31 | 
32 |     @pytest.mark.parametrize("lags", [1, 5, [1], [1, 3, 5, 100]])
33 |     @pytest.mark.parametrize("plots_per_row", [1, 3, 10])
34 |     def test_rows_and_cols(self, time_series, lags, plots_per_row):
35 |         ax = lag_plot(time_series, lags=lags, plots_per_row=plots_per_row)
36 |         if isinstance(lags, int):
37 |             lag_length = lags
38 |         else:
39 |             lag_length = len(lags)
40 |         assert ax.shape == (
41 |             (lag_length - 1) // plots_per_row + 1,
42 |             min(lag_length, plots_per_row),
43 |         )
44 |         plt.close("all")
45 | 
46 | 
47 | class TestACFplots:
48 |     @pytest.mark.parametrize("maxlags", [1, 5, 100])
49 |     @pytest.mark.parametrize("ci", [0.0, 0.05])
50 |     @pytest.mark.parametrize("partial", [True, False])
51 |     def test_ci_lines(self, time_series, maxlags, ci, partial):
52 |         ax = acf_plot(time_series, max_lags=maxlags, ci=ci, partial=partial)
53 |         assert len(ax.lines) == 3
54 |         plt.close("all")
55 | 
56 |     @pytest.mark.parametrize("maxlags", [1, 5, 100])
57 |     @pytest.mark.parametrize("ci", [0.0, 0.05])
58 |     @pytest.mark.parametrize("partial", [True, False])
59 |     def test_num_bars(self, time_series, maxlags, ci, partial):
60 |         ax = acf_plot(time_series, maxlags, ci, partial)
61 |         assert len(ax.containers[0]) == min(len(time_series), maxlags)
62 |         plt.close("all")
63 | 
64 | 
65 | class TestSubplots:
66 |     @pytest.mark.parametrize("cycle", ["year", "6M"])
67 |     @pytest.mark.parametrize("freq", ["M"])
68 |     @pytest.mark.parametrize("box", [True, False])
69 |     def test_subplots_number(self, time_series, cycle, freq, box):
70 |         ax = seasonal_subplots(time_series, cycle=cycle, freq=freq, box=box)
71 |         split = seasonal_split(time_series, cycle=cycle, freq=freq)
72 |         assert ax.size == split.shape[0]
73 |         plt.close("all")
74 | 
75 | 
76 | class TestSeasonalPlots:
77 |     @pytest.mark.parametrize("cycle", ["year", "6M"])
78 |     @pytest.mark.parametrize("freq", ["M", None])
79 |     @pytest.mark.parametrize("polar", [True, False])
80 |     @pytest.mark.parametrize("new_ax", [True, False])
81 |     def test_seasonal_num_lines(self, time_series, cycle, freq, polar, new_ax):
82 |         if new_ax:
83 |             if polar:
84 |                 ax = plt.subplot(111, projection="polar")
85 |             else:
86 |                 ax = plt.subplot(111)
87 |         else:
88 |             ax = None
89 |         ax = seasonal_plot(time_series, cycle=cycle, freq=freq, polar=polar, ax=ax)
90 |         split = seasonal_split(time_series, cycle=cycle, freq=freq)
91 |         assert len(ax.lines) == split.shape[1]
92 |         plt.close("all")
93 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
 1 | CONTRIBUTOR CODE OF CONDUCT
 2 | ===========================
 3 | (Code of Conduct)
 4 | -----------------
 5 | 
 6 | 
 7 | Our Pledge
 8 | ----------
 9 | 
10 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
11 | 
12 | Our Standards
13 | -------------
14 | 
15 | Examples of behavior that contributes to creating a positive environment include:
16 | 
17 | * 	Using welcoming and inclusive language;
18 | * 	Being respectful of differing viewpoints and experiences;
19 | * 	Gracefully accepting constructive criticism;
20 | * 	Focusing on what is best for the community;
21 | * 	Showing empathy towards other community members.
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * 	The use of sexualized language or imagery and unwelcome sexual attention or advances;
26 | * 	Trolling, insulting/derogatory comments, and personal or political attacks;
27 | * 	Public or private harassment;
28 | * 	Publishing others’ private information, such as a physical or electronic address, without explicit permission;
29 | * 	Other conduct which could reasonably be considered inappropriate in a professional setting.
30 | 
31 | Our Responsibilities
32 | --------------------
33 | 
34 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
35 | 
36 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
37 | 
38 | Scope
39 | -----
40 | 
41 | This Code of Conduct applies within all Giotto’s project spaces, to all content on <www.giotto.ai>, Giotto’s GitHub organization, or any other official Giotto web presence allowing for community interactions, and it also applies when an individual is representing the project or its community in public spaces.
42 | 
43 | Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
44 | 
45 | Enforcement
46 | -----------
47 | 
48 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at <maintainers@giotto.ai>. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances, Sanctions may include written warnings, expulsions from the project, project sponsored spaces, or project forums, or any other sanction which is deemed appropriate. [The project team] is obligated to maintain confidentiality with regard to the reporter of an incident. If the act is ongoing (such as someone engaging in harassment) or involves a threat to anyone's safety (e.g. threats of violence), the the project team may issue sanctions without notice. Further details of specific enforcement policies may be posted separately.
49 | 
50 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by the project leader.
51 | 
52 | Attribution
53 | -----------
54 | 
55 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at <https://www.contributor-covenant.org/version/1/4/code-of-conduct.html>, and includes some aspects of the TensorFlow Code of Conduct, available at <https://github.com/tensorflow/tensorflow/blob/master/CODE_OF_CONDUCT.md>
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | [![Deploy to gh-pages](https://github.com/giotto-ai/giotto-time/actions/workflows/deploy_github_pages.yml/badge.svg)](https://github.com/giotto-ai/giotto-time/actions/workflows/deploy_github_pages.yml)
  3 | [![Upload Python Package](https://github.com/giotto-ai/giotto-time/actions/workflows/build_and_publish.yml/badge.svg)](https://github.com/giotto-ai/giotto-time/actions/workflows/build_and_publish.yml)
  4 | [![CI](https://github.com/giotto-ai/giotto-time/actions/workflows/ci.yml/badge.svg)](https://github.com/giotto-ai/giotto-time/actions/workflows/ci.yml)
  5 | [![PyPI version](https://badge.fury.io/py/giotto-time.svg)](https://badge.fury.io/py/giotto-time)
  6 | [![Slack-join](https://img.shields.io/badge/Slack-Join-blue)](https://slack.giotto.ai/)
  7 | 
  8 | # giotto-time
  9 | 
 10 | giotto-time is a machine learning based time series forecasting toolbox in Python.
 11 | It is part of the [Giotto](https://github.com/giotto-ai) collection of open-source projects and aims to provide
 12 | feature extraction, analysis, causality testing and forecasting models based on
 13 | [scikit-learn](https://scikit-learn.org/stable/) API.
 14 | 
 15 | ## License
 16 | 
 17 | giotto-time is distributed under the AGPLv3 [license](https://github.com/giotto-ai/giotto-time/blob/master/LICENSE).
 18 | If you need a different distribution license, please contact the L2F team at business@l2f.ch.
 19 | 
 20 | ## Documentation
 21 | 
 22 | - API reference (stable release): https://giotto-ai.github.io/giotto-time/
 23 | 
 24 | ## Getting started
 25 | 
 26 | Get started with giotto-time by following the installation steps below.
 27 | Simple tutorials and real-world use cases can be found in example folder as notebooks.
 28 | 
 29 | ## Installation
 30 | 
 31 | ### User installation
 32 | 
 33 | Run this command in your favourite python environment
 34 | ```
 35 | pip install giotto-time
 36 | ```
 37 | 
 38 | ### Developer installation
 39 | 
 40 | Get the latest state of the source code with the command
 41 | 
 42 | ```
 43 | git clone https://github.com/giotto-ai/giotto-time.git
 44 | cd giotto-time
 45 | pip install -e ".[tests, doc]"
 46 | ```
 47 | 
 48 | ## Example
 49 | 
 50 | ```python
 51 | from gtime import *
 52 | from gtime.feature_extraction import *
 53 | import pandas as pd
 54 | import numpy as np
 55 | from sklearn.linear_model import LinearRegression
 56 | 
 57 | # Create random DataFrame with DatetimeIndex
 58 | X_dt = pd.DataFrame(np.random.randint(4, size=(20)),
 59 |                     index=pd.date_range("2019-12-20", "2020-01-08"),
 60 |                     columns=['time_series'])
 61 | 
 62 | # Convert the DatetimeIndex to PeriodIndex and create y matrix
 63 | X = preprocessing.TimeSeriesPreparation().transform(X_dt)
 64 | y = model_selection.horizon_shift(X, horizon=2)
 65 | 
 66 | # Create some features
 67 | cal = feature_generation.Calendar(region="europe", country="Switzerland", kernel=np.array([1, 2]))
 68 | X_f = compose.FeatureCreation(
 69 |     [('s_2', Shift(2), ['time_series']),
 70 |      ('ma_3', MovingAverage(window_size=3), ['time_series']),
 71 |      ('cal', cal, ['time_series'])]).fit_transform(X)
 72 | 
 73 | # Train/test split
 74 | X_train, y_train, X_test, y_test = model_selection.FeatureSplitter().transform(X_f, y)
 75 | 
 76 | # Try sklearn's MultiOutputRegressor as time-series forecasting model
 77 | gar = forecasting.GAR(LinearRegression())
 78 | gar.fit(X_train, y_train).predict(X_test)
 79 | 
 80 | ```
 81 | 
 82 | 
 83 | ## Contributing
 84 | 
 85 | We welcome new contributors of all experience levels. The Giotto
 86 | community goals are to be helpful, welcoming, and effective. To learn more about
 87 | making a contribution to giotto-time, please see the [CONTRIBUTING.rst](https://github.com/giotto-ai/giotto-time/blob/master/CONTRIBUTING.rst) 
 88 | file.
 89 | 
 90 | ## Links
 91 | 
 92 | - Official source code repo: https://github.com/giotto-ai/giotto-time
 93 | - Download releases: https://pypi.org/project/giotto-time/
 94 | - Issue tracker: https://github.com/giotto-ai/giotto-time/issues
 95 | 
 96 | ## Community
 97 | 
 98 | Giotto Slack workspace: https://slack.giotto.ai/
 99 | 
100 | ## Contacts
101 | 
102 | maintainers@giotto.ai
103 | 


--------------------------------------------------------------------------------
/gtime/experimental/trend_models/function_trend.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from gtime.models.trend_models.base import TrendModel
  3 | from scipy.optimize import minimize
  4 | from sklearn.metrics import mean_squared_error
  5 | 
  6 | 
  7 | class FunctionTrend(TrendModel):
  8 |     """A model for fitting, predicting and removing an custom functional trend
  9 |     from a time series. The transformed time series created will be trend
 10 |     stationary with respect to the specific function. To have more details,
 11 |     you can check this `link <https://en.wikipedia.org/wiki/Trend_stationary>`_.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     loss : ``Callable``, optional, (default=``mean_squared_error``).
 16 |         The loss function to use when fitting the model. The loss function must
 17 |         accept y_true, y_pred and return a single real number.
 18 | 
 19 |     """
 20 | 
 21 |     def __init__(self, model_form, loss=mean_squared_error):
 22 |         self.model_form = model_form
 23 |         self.loss = loss
 24 | 
 25 |     def fit(
 26 |         self, time_series: pd.DataFrame, x0: list, method: str = "BFGS"
 27 |     ) -> TrendModel:
 28 |         """Fit the model on the ``time_series``, with respect to the provided
 29 |         ``loss`` and using the provided ``method``. In order to see which
 30 |         methods are available, please check the 'scipy' `documentation
 31 |         <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_.
 32 | 
 33 |         Parameters
 34 |         ----------
 35 |         time_series : ``pd.DataFrame``, required.
 36 |             The time series on which to fit the model.
 37 | 
 38 |         x0 : ``list``.
 39 | 
 40 |         method : ``str``, optional, (default=``'BFGS``).
 41 |             The method to use in order to minimize the loss function.
 42 | 
 43 |         Returns
 44 |         -------
 45 |         self : ``TrendModel``
 46 |             The fitted object.
 47 | 
 48 |         """
 49 | 
 50 |         def prediction_error(model_weights):
 51 |             predictions = [
 52 |                 self.model_form(t, model_weights)
 53 |                 for t in range(0, time_series.shape[0])
 54 |             ]
 55 |             return self.loss(time_series.values, predictions)
 56 | 
 57 |         res = minimize(prediction_error, x0, method=method, options={"disp": False})
 58 | 
 59 |         self.model_weights_ = res["x"]
 60 | 
 61 |         self.t0_ = time_series.index[0]
 62 |         freq = time_series.index.freq
 63 |         if freq is not None:
 64 |             self.period_ = freq
 65 |         else:
 66 |             self.period_ = time_series.index[1] - time_series.index[0]
 67 | 
 68 |         return self
 69 | 
 70 |     def predict(self, t):
 71 |         """Using the fitted model, predict the values starting from ``X``.
 72 | 
 73 |         Parameters
 74 |         ----------
 75 |         X : ``pd.DataFrame``, required.
 76 |             The time series on which to predict.
 77 | 
 78 |         Returns
 79 |         -------
 80 |         predictions : ``pd.DataFrame``
 81 |             The output predictions.
 82 | 
 83 |         Raises
 84 |         ------
 85 |         ``NotFittedError``
 86 |             Raised if the model is not fitted yet.
 87 | 
 88 |         """
 89 |         # check fit run
 90 |         return self.model_form(t, self.model_weights_)
 91 | 
 92 |     def transform(self, time_series):
 93 |         """Transform the ``time_series`` by removing the trend.
 94 | 
 95 |         Parameters
 96 |         ----------
 97 |         time_series : ``pd.DataFrame``, required.
 98 |             The time series to transform.
 99 | 
100 |         Returns
101 |         -------
102 |         transformed_time_series : ``pd.DataFrame``
103 |             The transformed time series, without the trend.
104 | 
105 |         """
106 |         # check fit run
107 | 
108 |         ts = (time_series.index - self.t0_) / self.period_
109 | 
110 |         predictions = pd.Series(
111 |             index=time_series.index,
112 |             data=[self.model_form(t, self.model_weights_) for t in ts],
113 |         )
114 | 
115 |         return time_series.sub(predictions, axis=0)
116 | 


--------------------------------------------------------------------------------
/gtime/forecasting/trend.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from scipy.optimize import minimize
  6 | from sklearn.base import BaseEstimator, RegressorMixin
  7 | from sklearn.metrics import mean_squared_error
  8 | from sklearn.utils.validation import check_is_fitted
  9 | 
 10 | from gtime.utils.trends import TRENDS
 11 | 
 12 | 
 13 | class TrendForecaster(BaseEstimator, RegressorMixin):
 14 |     """Trend forecasting model.
 15 | 
 16 |     This estimator optimizes a trend function on train data and will forecast using this trend function with optimized
 17 |     parameters.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     trend : ``"polynomial"`` | ``"exponential"``, required
 22 |         The kind of trend removal to apply.
 23 | 
 24 |     trend_x0 : np.array, required
 25 |         Initialisation parameters passed to the trend function
 26 | 
 27 |     loss : Callable, optional, default: ``mean_squared_error``
 28 |         Loss function to minimize.
 29 | 
 30 |     method : str, optional, default: ``"BFGS"``
 31 |         Loss function optimisation method
 32 | 
 33 |     Examples
 34 |     --------
 35 |     >>> import pandas as pd
 36 |     >>> import numpy as np
 37 |     >>> from gtime.model_selection import horizon_shift, FeatureSplitter
 38 |     >>> from gtime.forecasting import TrendForecaster
 39 |     >>>
 40 |     >>> X = pd.DataFrame(np.random.random((10, 1)), index=pd.date_range("2020-01-01", "2020-01-10"))
 41 |     >>> y = horizon_shift(X, horizon=2)
 42 |     >>> X_train, y_train, X_test, y_test = FeatureSplitter().transform(X, y)
 43 |     >>>
 44 |     >>> tf = TrendForecaster(trend='polynomial', trend_x0=np.zeros(2))
 45 |     >>> tf.fit(X_train).predict(X_test)
 46 |     array([[0.39703029],
 47 |            [0.41734957]])
 48 | 
 49 |     """
 50 | 
 51 |     def __init__(
 52 |         self,
 53 |         trend: str,
 54 |         trend_x0: np.array,
 55 |         loss: Callable = mean_squared_error,
 56 |         method: str = "BFGS",
 57 |     ):
 58 |         self.trend = trend
 59 |         self.trend_x0 = trend_x0
 60 |         self.loss = loss
 61 |         self.method = method
 62 | 
 63 |     def fit(self, X: pd.DataFrame, y=None) -> "TrendForecaster":
 64 |         """Fit the estimator.
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         X : pd.DataFrame, shape (n_samples, n_features), required
 69 |             Input data.
 70 | 
 71 |         y : None
 72 |             There is no need of a target in a transformer, yet the pipeline API
 73 |             requires this parameter.
 74 | 
 75 |         Returns
 76 |         -------
 77 |         self : object
 78 |             Returns self.
 79 | 
 80 |         """
 81 | 
 82 |         if self.trend not in TRENDS:
 83 |             raise ValueError(
 84 |                 "The trend '%s' is not supported. Supported "
 85 |                 "trends are %s." % (self.trend, list(sorted(TRENDS)))
 86 |             )
 87 |         print([TRENDS[self.trend](t, 111) for t in range(0, X.shape[0])])
 88 |         self.best_trend_params_ = minimize(
 89 |             lambda opt: self.loss(
 90 |                 X.values, [TRENDS[self.trend](t, opt) for t in range(0, X.shape[0])]
 91 |             ),
 92 |             self.trend_x0,
 93 |             method=self.method,
 94 |             options={"disp": False},
 95 |         )["x"]
 96 | 
 97 |         return self
 98 | 
 99 |     def predict(self, X: pd.DataFrame) -> pd.DataFrame:
100 |         """Using the fitted polynomial, predict the values starting from ``X``.
101 | 
102 |         Parameters
103 |         ----------
104 |         X: pd.DataFrame, shape (n_samples, 1), required
105 |             The time series on which to predict.
106 | 
107 |         Returns
108 |         -------
109 |         predictions : pd.DataFrame, shape (n_samples, 1)
110 |             The output predictions.
111 | 
112 |         Raises
113 |         ------
114 |         NotFittedError
115 |             Raised if the model is not fitted yet.
116 | 
117 |         """
118 |         check_is_fitted(self)
119 | 
120 |         predictions = TRENDS[self.trend](X.values, self.best_trend_params_)
121 |         return predictions
122 | 


--------------------------------------------------------------------------------
/gtime/hierarchical/naive.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | from typing import Dict
  3 | 
  4 | import pandas as pd
  5 | from sklearn.base import BaseEstimator
  6 | from sklearn.utils.validation import check_is_fitted
  7 | 
  8 | from gtime.hierarchical.base import HierarchicalBase
  9 | 
 10 | 
 11 | class HierarchicalNaive(HierarchicalBase):
 12 |     """ Simplest hierarchical model possible.
 13 |     It does not perform any aggregation of the results.
 14 |     Each time series is fitted and predicted independently.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     model: BaseEstimator, required
 19 |         time series forecasting model that is applied to each of the time series. A cross validation model
 20 |         can also be passed.
 21 |     Examples
 22 |     --------
 23 |     >>> import pandas._testing as testing
 24 |     >>> from gtime.time_series_models import AR
 25 |     >>> from gtime.hierarchical import HierarchicalNaive
 26 |     >>>
 27 |     >>> testing.N, testing.K = 20, 1
 28 |     >>> data1 = testing.makeTimeDataFrame(freq="s")
 29 |     >>> data2 = testing.makeTimeDataFrame(freq="s")
 30 |     >>> data = {'data1': data1, 'data2': data2}
 31 |     >>> time_series_model = AR(p=2, horizon=3)
 32 |     >>>
 33 |     >>> hierarchical_model = HierarchicalNaive(model=time_series_model)
 34 |     >>> hierarchical_model.fit(data)
 35 |     >>> hierarchical_model.predict()
 36 |     {'data1':                           y_1       y_2       y_3
 37 |     2000-01-01 00:00:17  0.475903  0.834633  0.649467
 38 |     2000-01-01 00:00:18  0.644168  0.610287  0.383904
 39 |     2000-01-01 00:00:19  0.180920  0.596606  0.696133, 'data2':                           y_1       y_2       y_3
 40 |     2000-01-01 00:00:17 -0.117342  0.006594 -0.638133
 41 |     2000-01-01 00:00:18 -0.394193 -0.607146  0.323875
 42 |     2000-01-01 00:00:19 -0.381479  0.088210 -0.356775}
 43 |     """
 44 | 
 45 |     def __init__(self, model: BaseEstimator):
 46 |         super().__init__(model=model, hierarchy_tree="infer")
 47 | 
 48 |     def fit(self, X: Dict[str, pd.DataFrame], y: pd.DataFrame = None):
 49 |         """ Fit method
 50 | 
 51 |         Parameters
 52 |         ----------
 53 |         X : Dict[str, pd.DataFrame], required
 54 |             A dictionary of time series. Each is fitted independently
 55 |         y : pd.DataFrame, optional, default = ``None``
 56 |             only for compatibility
 57 | 
 58 |         Returns
 59 |         -------
 60 |         self
 61 |         """
 62 |         self._check_is_dict_of_dataframes_with_str_key(X)
 63 |         self._infer_hierarchy_tree(X)
 64 |         self._initialize_models(X)
 65 |         for key, time_series in X.items():
 66 |             self.models_[key].fit(time_series)
 67 |         return self
 68 | 
 69 |     def predict(self, X: Dict[str, pd.DataFrame] = None):
 70 |         """ Predict method
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         X : Dict[str, pd.DataFrame], optional, default = ``None``
 75 |             time series to predict. If ``None`` all the fitted time series are predicted.
 76 |             The keys in ``X`` have to match the ones used to fit.
 77 | 
 78 |         Returns
 79 |         -------
 80 |         predictions : Dict[str, pd.DataFrame]
 81 |         """
 82 |         check_is_fitted(self)
 83 |         if X is None:
 84 |             return self._predict_fitted_time_series()
 85 |         else:
 86 |             return self._predict_new_time_series(X)
 87 | 
 88 |     def _initialize_models(self, X: Dict[str, pd.DataFrame]):
 89 |         print(self.model)
 90 |         self.models_ = {key: deepcopy(self.model) for key in X}
 91 | 
 92 |     def _infer_hierarchy_tree(self, X: Dict[str, pd.DataFrame]):
 93 |         self.hierarchy_tree_ = set(
 94 |             X.keys()
 95 |         )  # No need of a proper hierarchy tree for HierarchicalNaive
 96 | 
 97 |     def _predict_fitted_time_series(self) -> Dict[str, pd.DataFrame]:
 98 |         return {key: model.predict() for key, model in self.models_.items()}
 99 | 
100 |     def _predict_new_time_series(self, X: pd.DataFrame) -> Dict[str, pd.DataFrame]:
101 |         return {
102 |             key: self.models_[key].predict(time_series)
103 |             for key, time_series in X.items()
104 |         }
105 | 


--------------------------------------------------------------------------------
/gtime/regressors/tests/test_explainable.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import pytest
  4 | from hypothesis import given, settings
  5 | from sklearn import clone
  6 | from sklearn.base import BaseEstimator
  7 | from sklearn.cluster import DBSCAN, KMeans, SpectralClustering
  8 | from sklearn.decomposition import PCA
  9 | from sklearn.exceptions import NotFittedError
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | from gtime.explainability import _LimeExplainer, _ShapExplainer
 14 | from gtime.forecasting.tests.test_gar import df_transformer
 15 | from gtime.model_selection import FeatureSplitter
 16 | from gtime.regressors import ExplainableRegressor
 17 | from gtime.utils.hypothesis.feature_matrices import (
 18 |     numpy_X_matrices,
 19 |     numpy_X_y_matrices,
 20 |     X_y_matrices,
 21 | )
 22 | from gtime.utils.hypothesis.general_strategies import regressors
 23 | from gtime.utils.hypothesis.time_indexes import samples_from
 24 | 
 25 | 
 26 | def bad_regressors():
 27 |     return samples_from([DBSCAN(), SpectralClustering(), PCA(),])
 28 | 
 29 | 
 30 | @given(bad_regressors())
 31 | def test_bad_regressors(bad_regressor):
 32 |     assert hasattr(bad_regressor, "fit")
 33 |     assert not hasattr(bad_regressor, "predict")
 34 | 
 35 | 
 36 | class TestExplainableRegressor:
 37 |     @pytest.mark.parametrize("explainer_type", ["lime", "shap"])
 38 |     @given(estimator=regressors())
 39 |     def test_constructor(self, estimator, explainer_type):
 40 |         regressor = ExplainableRegressor(estimator, explainer_type)
 41 |         if explainer_type == "lime":
 42 |             assert isinstance(regressor.explainer, _LimeExplainer)
 43 |         elif explainer_type == "shap":
 44 |             assert isinstance(regressor.explainer, _ShapExplainer)
 45 | 
 46 |     @given(estimator=regressors())
 47 |     def test_constructor_bad_explainer(self, estimator):
 48 |         with pytest.raises(ValueError):
 49 |             ExplainableRegressor(estimator, "bad")
 50 | 
 51 |     @pytest.mark.parametrize("explainer_type", ["lime", "shap"])
 52 |     @given(bad_estimator=bad_regressors())
 53 |     def test_constructor_bad_regressor(self, bad_estimator, explainer_type):
 54 |         with pytest.raises(TypeError):
 55 |             ExplainableRegressor(bad_estimator, explainer_type)
 56 | 
 57 |     @pytest.mark.parametrize("explainer_type", ["lime", "shap"])
 58 |     @given(estimator=regressors(), X=numpy_X_matrices())
 59 |     def test_error_predict_not_fitted(self, estimator, explainer_type, X):
 60 |         regressor = ExplainableRegressor(estimator, explainer_type)
 61 |         with pytest.raises(NotFittedError):
 62 |             regressor.predict(X)
 63 | 
 64 |     def _get_fit_attributes(self, estimator: BaseEstimator) -> List[str]:
 65 |         return [
 66 |             v for v in vars(estimator) if v.endswith("_") and not v.startswith("__")
 67 |         ]
 68 | 
 69 |     @pytest.mark.parametrize("explainer_type", ["lime", "shap"])
 70 |     @given(
 71 |         estimator=regressors(), X_y=numpy_X_y_matrices(min_value=-100, max_value=100)
 72 |     )
 73 |     def test_fit_values(self, estimator, explainer_type, X_y):
 74 |         X, y = X_y
 75 |         regressor = ExplainableRegressor(estimator, explainer_type)
 76 |         regressor.fit(X, y)
 77 | 
 78 |         cloned_estimator = clone(estimator)
 79 |         cloned_estimator.fit(X, y)
 80 | 
 81 |         estimator_fit_attributes = self._get_fit_attributes(regressor.estimator)
 82 |         cloned_estimator_fit_attributes = self._get_fit_attributes(cloned_estimator)
 83 | 
 84 |         np.testing.assert_array_equal(
 85 |             estimator_fit_attributes, cloned_estimator_fit_attributes
 86 |         )
 87 | 
 88 |     @settings(deadline=pd.Timedelta(milliseconds=5000), max_examples=7)
 89 |     @pytest.mark.parametrize("explainer_type", ["lime", "shap"])
 90 |     @given(
 91 |         estimator=regressors(), X_y=numpy_X_y_matrices(min_value=-100, max_value=100)
 92 |     )
 93 |     def test_predict_values(self, estimator, explainer_type, X_y):
 94 |         X, y = X_y
 95 |         X_test = X[:1, :]
 96 |         regressor = ExplainableRegressor(estimator, explainer_type)
 97 |         regressor_predictions = regressor.fit(X, y).predict(X_test)
 98 | 
 99 |         cloned_estimator = clone(estimator)
100 |         estimator_predictions = cloned_estimator.fit(X, y).predict(X_test)
101 | 
102 |         assert regressor_predictions.shape == estimator_predictions.shape
103 |         assert regressor_predictions.shape[0] == len(regressor.explanations_)
104 | 


--------------------------------------------------------------------------------
/gtime/regressors/explainable.py:
--------------------------------------------------------------------------------
  1 | from typing import Union, List, Tuple
  2 | 
  3 | from sklearn.base import BaseEstimator, RegressorMixin
  4 | import numpy as np
  5 | from sklearn.utils.validation import check_is_fitted
  6 | import pandas as pd
  7 | 
  8 | from gtime.explainability import _LimeExplainer, _ShapExplainer
  9 | 
 10 | 
 11 | class ExplainableRegressor(BaseEstimator, RegressorMixin):
 12 |     """ Wraps the most commons scikit-learn regressor to offer a nice to use interface to fit/predict
 13 |     models and at the same time to explain the predictions.
 14 | 
 15 |     Since it follows the fit/predict interface of scikit-learn model it is compatible with
 16 |     scikit-learn pipelines, etc..
 17 | 
 18 |     2 explainers are available: LIME and SHAP
 19 | 
 20 |     You can get the explanation by accessing to `regressor.explainer_.explanations_` after
 21 |     the predict function,
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     estimator: RegressorMixin, required
 26 |         the scikit-learn model
 27 |     explainer_type: str, required
 28 |         'lime' or 'shap'
 29 | 
 30 |     Examples
 31 |     --------
 32 |     >>> import numpy as np
 33 |     >>> from gtime.regressors import ExplainableRegressor
 34 |     >>> from sklearn.ensemble import RandomForestRegressor
 35 |     >>> X = np.random.random((30, 5))
 36 |     >>> y = np.random.random(30)
 37 |     >>> X_train, y_train = X[:20], y[:20]
 38 |     >>> X_test, y_test = X[20:], y[20:]
 39 |     >>>
 40 |     >>> random_forest = RandomForestRegressor()
 41 |     >>> explainable_regressor = ExplainableRegressor(random_forest, 'shap')
 42 |     >>>
 43 |     >>> explainable_regressor.fit(X_train, y_train, feature_names=['a', 'b', 'c', 'd', 'e'])
 44 |     >>> explainable_regressor.predict(X_test)
 45 |     array([0.41323105, 0.40386639, 0.46462663, 0.3795568 , 0.57571486,
 46 |            0.37079003, 0.54756082, 0.35160197, 0.30881165, 0.48201442])
 47 |     >>> explainable_regressor.explainer_.explanations_[0]
 48 |     {'a': -0.019896434698603117, 'b': 0.029814649814215954, 'c': 0.02447547087613202, 'd': 0.021313815648682066, 'e': -0.10778800140251406}
 49 |     """
 50 | 
 51 |     def __init__(self, estimator: RegressorMixin, explainer_type: str):
 52 |         self.estimator = self._check_estimator(estimator)
 53 |         self.explainer_type = explainer_type
 54 |         self.explainer = self._initialize_explainer()
 55 | 
 56 |     def _check_estimator(self, estimator: RegressorMixin) -> RegressorMixin:
 57 |         if not hasattr(estimator, "fit") or not hasattr(estimator, "predict"):
 58 |             raise TypeError(f"Estimator not compatible: {estimator}")
 59 |         return estimator
 60 | 
 61 |     def _initialize_explainer(self) -> Union[_LimeExplainer, _ShapExplainer]:
 62 |         if self.explainer_type == "lime":
 63 |             return _LimeExplainer()
 64 |         elif self.explainer_type == "shap":
 65 |             return _ShapExplainer()
 66 |         else:
 67 |             raise ValueError(f"Explainer not available: {self.explainer_type}")
 68 | 
 69 |     def fit(
 70 |         self, X: np.ndarray, y: np.ndarray, feature_names: List[str] = None,
 71 |     ):
 72 |         """ Fit function that calls the fit on the estimator and on the explainer.
 73 | 
 74 |         Parameters
 75 |         ----------
 76 |         X: np.ndarray, required
 77 |             train matrix
 78 |         y: np.ndarray, required
 79 |             train true values
 80 |         feature_names: List[str], optional, (default=`None`)
 81 |             the name of the feature column of X
 82 | 
 83 |         Returns
 84 |         -------
 85 |         Fitted `ExplainableRegressor`
 86 |         """
 87 |         self.estimator_ = self.estimator.fit(X, y)
 88 |         self.explainer_ = self.explainer.fit(
 89 |             self.estimator_, X, feature_names=feature_names
 90 |         )
 91 |         return self
 92 | 
 93 |     def predict(self, X: np.ndarray):
 94 |         """ Predict function that call the predict function of the explainer.
 95 | 
 96 |         You can access to the explanation of the predictions via
 97 |         `regressor.explainer_.explanations_` attribute
 98 | 
 99 |         Parameters
100 |         ----------
101 |         X: np.ndarray, required
102 |             test matrix
103 | 
104 |         Returns
105 |         -------
106 |         predictions: np.ndarray
107 |         """
108 |         check_is_fitted(self)
109 |         predictions = self.explainer_.predict(X)
110 |         self.explanations_ = self.explainer_.explanations_
111 |         return predictions
112 | 


--------------------------------------------------------------------------------
/gtime/causality/pearson_correlation.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from sklearn.base import TransformerMixin, BaseEstimator
  3 | 
  4 | from gtime.causality.base import CausalityMixin
  5 | 
  6 | 
  7 | class ShiftedPearsonCorrelation(BaseEstimator, TransformerMixin, CausalityMixin):
  8 |     """Class responsible for assessing the shifted Pearson correlations (PPMCC) between
  9 |     two or more series. For more info about the test, click
 10 |     `here <https://statistics.laerd.com/statistical-guides/pearson-correlation-coefficient-statistical-guide.php>`_.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     min_shift : int, optional, default: ``1``
 15 |         The minimum number of shifts to check for.
 16 | 
 17 |     max_shift : int, optional, default: ``10``
 18 |         The maximum number of shifts to check for.
 19 | 
 20 |     target_col : str, optional, default: ``None``
 21 |             The column to use as the a reference (i.e., the columns which is not
 22 |             shifted).
 23 | 
 24 |     dropna : bool, optional, default: ``False``
 25 |         Determines if the Nan values created by shifting are retained or dropped.
 26 | 
 27 |     bootstrap_iterations : int, optional, default: ``None``
 28 |         If not None, compute the p_values of the test, by performing bootstrapping of
 29 |         the original data (sampling with replacement).
 30 | 
 31 |     permutation_iterations : int, optional, default: ``None``
 32 |         If not None, compute the p_values of the test, by performing permutations of
 33 |         the original data.
 34 | 
 35 |     Examples
 36 |     --------
 37 |     >>> from gtime.causality.pearson_correlation import ShiftedPearsonCorrelation
 38 |     >>> import pandas.util.testing as testing
 39 |     >>> data = testing.makeTimeDataFrame(freq="s")
 40 |     >>> spc = ShiftedPearsonCorrelation(target_col="A")
 41 |     >>> spc.fit(data)
 42 |     >>> spc.best_shifts_
 43 |     y  A  B  C  D
 44 |     x
 45 |     A  8  9  6  5
 46 |     B  7  4  4  6
 47 |     C  3  4  9  9
 48 |     D  7  1  9  1
 49 |     >>> spc.max_corrs_
 50 |     y         A         B         C         D
 51 |     x
 52 |     A  0.383800  0.260627  0.343628  0.360151
 53 |     B  0.311608  0.307203  0.255969  0.298523
 54 |     C  0.373613  0.267335  0.211913  0.140034
 55 |     D  0.496535  0.204770  0.402473  0.310065
 56 |     """
 57 | 
 58 |     def __init__(
 59 |         self,
 60 |         min_shift: int = 1,
 61 |         max_shift: int = 10,
 62 |         target_col: str = None,
 63 |         dropna: bool = False,
 64 |         bootstrap_iterations: int = None,
 65 |         permutation_iterations: int = None,
 66 |     ):
 67 |         super().__init__(
 68 |             bootstrap_iterations=bootstrap_iterations,
 69 |             permutation_iterations=permutation_iterations,
 70 |         )
 71 |         self.min_shift = min_shift
 72 |         self.max_shift = max_shift
 73 |         self.target_col = target_col
 74 |         self.dropna = dropna
 75 | 
 76 |     def fit(self, data: pd.DataFrame) -> "ShiftedPearsonCorrelation":
 77 |         """Create the dataframe of shifts of each time series which maximize the
 78 |          Pearson correlation (PPMCC).
 79 | 
 80 |         Parameters
 81 |         ----------
 82 |         data : pd.DataFrame, shape (n_samples, n_time_series), required
 83 |             The DataFrame containing the time series on which to compute the shifted
 84 |             correlations.
 85 | 
 86 |         Returns
 87 |         -------
 88 |         self : ``ShiftedPearsonCorrelation``
 89 | 
 90 |         """
 91 |         best_shifts = self._compute_best_shifts(data, self._get_max_corr_shift)
 92 | 
 93 |         pivot_tables = self._create_pivot_tables(best_shifts)
 94 | 
 95 |         self.best_shifts_ = pivot_tables["best_shifts"]
 96 |         self.max_corrs_ = pivot_tables["max_corrs"]
 97 | 
 98 |         if self.bootstrap_iterations:
 99 |             self.bootstrap_p_values_ = pivot_tables["bootstrap_p_values"]
100 | 
101 |         if self.permutation_iterations:
102 |             self.permutation_p_values_ = pivot_tables["permutation_p_values"]
103 | 
104 |         return self
105 | 
106 |     def _get_max_corr_shift(self, data: pd.DataFrame, x, y):
107 |         shifts = pd.DataFrame()
108 | 
109 |         for shift in range(self.min_shift, self.max_shift + 1):
110 |             shifts[shift] = data[x].shift(shift)
111 | 
112 |         shifts = shifts.dropna()
113 |         self.shifted_corrs = shifts.corrwith(data[y])
114 | 
115 |         q = self.shifted_corrs.max(), self.shifted_corrs.idxmax()
116 |         return q
117 | 


--------------------------------------------------------------------------------
/gtime/model_selection/splitters.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | 
  5 | class FeatureSplitter:
  6 |     """Splits the feature matrices X and y in X_train, y_train, X_test, y_test.
  7 | 
  8 |     X and y are the feature matrices obtained from the FeatureCreation class.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     drop_na_mode : str, optional, default: ``'any'``
 13 |         How to drop the Nan contained in the ``X`` and ``y`` matrices. Only 'any' is
 14 |         supported for the moment.
 15 | 
 16 |     Examples
 17 |     --------
 18 |     >>> import pandas as pd
 19 |     >>> import numpy as np
 20 |     >>> from gtime.model_selection import FeatureSplitter
 21 |     >>> X = pd.DataFrame.from_dict({"feature_0": [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8],
 22 |     ...                             "feature_1": [np.nan, np.nan, 0.5, 1.5, 2.5, 3.5,
 23 |     ...                                            4.5, 5.5, 6.5, 7.5, ]
 24 |     ...                            })
 25 |     >>> y = pd.DataFrame.from_dict({"y_0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 26 |     ...                             "y_1": [1, 2, 3, 4, 5, 6, 7, 8, 9, np.nan],
 27 |     ...                             "y_2": [2, 3, 4, 5, 6, 7, 8, 9, np.nan, np.nan]
 28 |     ...                            })
 29 |     >>> feature_splitter = FeatureSplitter()
 30 |     >>> X_train, y_train, X_test, y_test = feature_splitter.transform(X, y)
 31 |     >>> X_train
 32 |        feature_0  feature_1
 33 |     2        1.0        0.5
 34 |     3        2.0        1.5
 35 |     4        3.0        2.5
 36 |     5        4.0        3.5
 37 |     6        5.0        4.5
 38 |     7        6.0        5.5
 39 |     >>> y_train
 40 |        y_0  y_1  y_2
 41 |     2    2  3.0  4.0
 42 |     3    3  4.0  5.0
 43 |     4    4  5.0  6.0
 44 |     5    5  6.0  7.0
 45 |     6    6  7.0  8.0
 46 |     7    7  8.0  9.0
 47 |     >>> X_test
 48 |        feature_0  feature_1
 49 |     8        7.0        6.5
 50 |     9        8.0        7.5
 51 |     >>> y_test
 52 |        y_0  y_1  y_2
 53 |     8    8  9.0  NaN
 54 |     9    9  NaN  NaN
 55 | 
 56 |     """
 57 | 
 58 |     def __init__(self, drop_na_mode: str = "any"):
 59 |         if drop_na_mode != "any":
 60 |             raise ValueError(
 61 |                 f'Only drop_na_mode="any" is supported. Detected: {drop_na_mode}'
 62 |             )
 63 |         self.drop_na_mode = drop_na_mode
 64 | 
 65 |     def transform(
 66 |         self, X: pd.DataFrame, y: pd.DataFrame
 67 |     ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame):
 68 |         """Split the feature matrices X and y in X_train, y_train, X_test, y_test.
 69 | 
 70 |         ``X`` and ``y`` are the feature matrices obtained from the FeatureCreation
 71 |         class.
 72 | 
 73 |         Parameters
 74 |         ----------
 75 |         X : pd.DataFrame, shape (n_samples, n_features), required
 76 |             The feature matrix.
 77 | 
 78 |         y : pd.DataFrame, shape (n_samples, horizon), required
 79 |             The y matrix.
 80 | 
 81 |         Returns
 82 |         -------
 83 |         X_train, y_train, X_test, y_test : Tuple[pd.DataFrame, pd.DataFrame,
 84 |             pd.DataFrame, pd.DataFrame]
 85 |             The X and y, split between train and test.
 86 | 
 87 |         """
 88 |         X, y = self._drop_X_na(X, y)
 89 |         X_train, y_train, X_test, y_test = self._split_train_test(X, y)
 90 |         return X_train, y_train, X_test, y_test
 91 | 
 92 |     def _drop_X_na(
 93 |         self, X: pd.DataFrame, y: pd.DataFrame
 94 |     ) -> (pd.DataFrame, pd.DataFrame):
 95 | 
 96 |         X = X.dropna(axis=0, how=self.drop_na_mode)
 97 |         y = y.loc[X.index]
 98 |         return X, y
 99 | 
100 |     def _split_train_test(
101 |         self, X: pd.DataFrame, y: pd.DataFrame
102 |     ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame):
103 | 
104 |         train_indexes, test_indexes = self._get_train_test_indexes_from_y(y)
105 |         X_train, y_train = X.loc[train_indexes], y.loc[train_indexes]
106 |         X_test, y_test = X.loc[test_indexes], y.loc[test_indexes]
107 |         return X_train, y_train, X_test, y_test
108 | 
109 |     def _get_train_test_indexes_from_y(self, y):
110 |         last_train_index = self._last_non_nan_y_index(y)
111 |         train_indexes = y.loc[:last_train_index].index if last_train_index else []
112 |         test_indexes = y.index.difference(train_indexes)
113 |         return train_indexes, test_indexes
114 | 
115 |     def _last_non_nan_y_index(self, y: pd.DataFrame) -> pd.Period:
116 |         y_nan = y.isnull().any(axis=1).replace(True, np.nan)
117 |         return y_nan.last_valid_index()
118 | 


--------------------------------------------------------------------------------
/gtime/regressors/multi_output.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List
  2 | 
  3 | import numpy as np
  4 | from sklearn.base import RegressorMixin
  5 | from sklearn.multioutput import (
  6 |     MultiOutputRegressor,
  7 |     _MultiOutputEstimator,
  8 |     _fit_estimator,
  9 | )
 10 | from sklearn.utils import check_X_y, check_array
 11 | from sklearn.utils.validation import check_is_fitted
 12 | 
 13 | from gtime.explainability.explainer import Explainer, _LimeExplainer, _ShapExplainer
 14 | 
 15 | 
 16 | class MultiFeatureMultiOutputRegressor(RegressorMixin, _MultiOutputEstimator):
 17 |     """ Multi target regression with option to choose the features for each target.
 18 | 
 19 |     This strategy consists of fitting one regressor per target. It is built over
 20 |     sklearn.multioutput.MultiOutputRegressor. Compared to this, it allows to choose
 21 |     different features for each regressor.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     estimator: RegressorMixin, required
 26 |         An estimator object implementing fit and predict.
 27 | 
 28 |     Examples
 29 |     --------
 30 |     >>> import numpy as np
 31 |     >>> from gtime.regressors import MultiFeatureMultiOutputRegressor
 32 |     >>> from sklearn.ensemble import RandomForestRegressor
 33 |     >>> X = np.random.random((30, 5))
 34 |     >>> y = np.random.random((30, 3))
 35 |     >>> X_train, y_train = X[:20], y[:20]
 36 |     >>> X_test, y_test = X[20:], y[20:]
 37 |     >>>
 38 |     >>> random_forest = RandomForestRegressor()
 39 |     >>> regressor = MultiFeatureMultiOutputRegressor(estimator=random_forest)
 40 |     >>>
 41 |     >>> target_to_features_dict = {0: [0,1,2], 1: [0,1,3], 2: [0,1,4]}
 42 |     >>> regressor.fit(X_train, y_train, target_to_features_dict=target_to_features_dict)
 43 |     >>>
 44 |     >>> predictions = regressor.predict(X_test)
 45 |     >>> predictions.shape
 46 |     (10, 3)
 47 | 
 48 |     """
 49 | 
 50 |     def __init__(
 51 |         self,
 52 |         estimator: RegressorMixin,
 53 |         target_to_features_dict: Dict[int, List[int]] = None,
 54 |     ):
 55 |         super().__init__(estimator=estimator, n_jobs=1)
 56 |         self.target_to_features_dict = target_to_features_dict
 57 | 
 58 |     def fit(self, X: np.ndarray, y: np.ndarray, **kwargs):
 59 |         """Fit the model.
 60 | 
 61 |         Train the models, one for each target variable in y.
 62 | 
 63 |         Parameters
 64 |         ----------
 65 |         X : np.ndarray, shape (n_samples, n_features), required.
 66 |             The data.
 67 |         y : np.ndarray, shape (n_samples, horizon), required.
 68 |             The matrix containing the target variables.
 69 | 
 70 |         Returns
 71 |         -------
 72 |         self : object
 73 | 
 74 | 
 75 |         """
 76 |         target_to_features_dict = kwargs.get(
 77 |             "target_to_features_dict", self.target_to_features_dict
 78 |         )
 79 |         if target_to_features_dict is None:
 80 |             super().fit(X, y)
 81 |             self.target_to_features_dict_ = None
 82 |             return self
 83 | 
 84 |         X, y = check_X_y(X, y, multi_output=True, accept_sparse=True)
 85 | 
 86 |         if y.ndim == 1:
 87 |             raise ValueError("y must have at least two dimensions")
 88 | 
 89 |         self.estimators_ = [
 90 |             _fit_estimator(self.estimator, X[:, target_to_features_dict[i]], y[:, i])
 91 |             for i in range(y.shape[1])
 92 |         ]
 93 |         self.target_to_features_dict_ = target_to_features_dict
 94 |         self.expected_X_shape_ = X.shape[1]
 95 |         return self
 96 | 
 97 |     def predict(self, X: np.ndarray) -> np.ndarray:
 98 |         """For each row in ``X``, make a prediction for each fitted model
 99 | 
100 |         Parameters
101 |         ----------
102 |         X : np.ndarray, shape (n_samples, n_features), required
103 |             The data.
104 | 
105 |         Returns
106 |         -------
107 |         predictions : np.ndarray, shape (n_samples, horizon)
108 |             The predictions
109 | 
110 |         """
111 |         check_is_fitted(self)
112 |         if self.target_to_features_dict_ is None:
113 |             return super().predict(X)
114 | 
115 |         X = check_array(X, accept_sparse=True)
116 |         if X.shape[1] != self.expected_X_shape_:
117 |             raise ValueError(
118 |                 f"Expected X shape is {self.expected_X_shape_}. Detected {X.shape[1]}"
119 |             )
120 |         y = [
121 |             estimator.predict(X[:, self.target_to_features_dict_[i]])
122 |             for i, estimator in enumerate(self.estimators_)
123 |         ]
124 | 
125 |         return np.asarray(y).T
126 | 


--------------------------------------------------------------------------------
/gtime/hierarchical/tests/test_naive.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import pytest
  4 | import sklearn
  5 | from hypothesis import given
  6 | import hypothesis.strategies as st
  7 | from hypothesis.extra.numpy import arrays
  8 | from pytest import fixture
  9 | 
 10 | from gtime.hierarchical import HierarchicalNaive, HierarchicalBase
 11 | from gtime.utils.fixtures import (
 12 |     time_series_forecasting_model1_no_cache,
 13 |     features1,
 14 |     model1,
 15 | )
 16 | from gtime.utils.hypothesis.time_indexes import giotto_time_series, period_indexes
 17 | 
 18 | 
 19 | @fixture(scope="function")
 20 | def hierarchical_naive_model(time_series_forecasting_model1_no_cache):
 21 |     return HierarchicalNaive(time_series_forecasting_model1_no_cache)
 22 | 
 23 | 
 24 | @st.composite
 25 | def n_time_series_with_same_index(
 26 |     draw, min_length: int = 5, min_n: int = 1, max_n: int = 5,
 27 | ):
 28 |     n = draw(st.integers(min_value=min_n, max_value=max_n))
 29 |     index = draw(period_indexes(min_length=min_length))
 30 |     dictionary = {}
 31 |     for i in range(n):
 32 |         key = str(i)
 33 |         df_values = draw(
 34 |             arrays(
 35 |                 dtype=np.float64,
 36 |                 shape=index.shape[0],
 37 |                 elements=st.floats(allow_nan=False, allow_infinity=False, width=32),
 38 |             )
 39 |         )
 40 |         value = pd.DataFrame(index=index, data=df_values)
 41 |         dictionary[key] = value
 42 |     return dictionary
 43 | 
 44 | 
 45 | class TestHierarchicalBase:
 46 |     def test_class_abstract(self, model1):
 47 |         HierarchicalBase(model1, {})
 48 | 
 49 | 
 50 | class TestHierarchicalNaive:
 51 |     def test_constructor(self, time_series_forecasting_model1_no_cache):
 52 |         HierarchicalNaive(model=time_series_forecasting_model1_no_cache)
 53 | 
 54 |     def test_constructor_no_hierarchy_tree(
 55 |         self, time_series_forecasting_model1_no_cache
 56 |     ):
 57 |         hierarchy_tree = {}
 58 |         with pytest.raises(TypeError):
 59 |             HierarchicalNaive(
 60 |                 model=time_series_forecasting_model1_no_cache,
 61 |                 hierarchy_tree=hierarchy_tree,
 62 |             )
 63 | 
 64 |     @given(time_series=giotto_time_series(min_length=5))
 65 |     def test_error_fit_dataframe(self, time_series, hierarchical_naive_model):
 66 |         with pytest.raises(ValueError):
 67 |             hierarchical_naive_model.fit(time_series)
 68 | 
 69 |     @given(time_series=giotto_time_series(min_length=5))
 70 |     def test_error_fit_key_not_string(self, time_series, hierarchical_naive_model):
 71 |         with pytest.raises(ValueError):
 72 |             hierarchical_naive_model.fit({1: time_series})
 73 | 
 74 |     def test_error_fit_value_not_dataframe(self, hierarchical_naive_model):
 75 |         with pytest.raises(ValueError):
 76 |             hierarchical_naive_model.fit({"wrong_field": 12})
 77 | 
 78 |     @given(dataframes=n_time_series_with_same_index())
 79 |     def test_fit_n_dataframes(self, dataframes, hierarchical_naive_model):
 80 |         hierarchical_naive_model.fit(dataframes)
 81 | 
 82 |     @given(dataframes=n_time_series_with_same_index())
 83 |     def test_fit_predict_n_dataframes_on_different_data(
 84 |         self, dataframes, hierarchical_naive_model
 85 |     ):
 86 |         hierarchical_naive_model.fit(dataframes).predict(dataframes)
 87 | 
 88 |     @given(dataframes=n_time_series_with_same_index())
 89 |     def test_fit_predict_n_dataframes(self, dataframes, hierarchical_naive_model):
 90 |         hierarchical_naive_model.fit(dataframes).predict()
 91 | 
 92 |     @given(dataframes=n_time_series_with_same_index())
 93 |     def test_fit_predict_on_subset_of_time_series(
 94 |         self, dataframes, hierarchical_naive_model
 95 |     ):
 96 |         key = np.random.choice(list(dataframes.keys()), 1)[0]
 97 |         hierarchical_naive_model.fit(dataframes)
 98 |         hierarchical_naive_model.predict({key: dataframes[key]})
 99 | 
100 |     def test_error_predict_not_fitted(self, hierarchical_naive_model):
101 |         with pytest.raises(sklearn.exceptions.NotFittedError):
102 |             hierarchical_naive_model.predict()
103 | 
104 |     @given(dataframes=n_time_series_with_same_index())
105 |     def test_error_with_bad_predict_key(self, dataframes, hierarchical_naive_model):
106 |         correct_key = np.random.choice(list(dataframes.keys()), 1)[0]
107 |         bad_key = "".join(dataframes.keys()) + "bad_key"
108 |         hierarchical_naive_model.fit(dataframes)
109 |         with pytest.raises(KeyError):
110 |             hierarchical_naive_model.predict({bad_key: dataframes[correct_key]})
111 | 


--------------------------------------------------------------------------------
/gtime/causality/linear_coefficient.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from sklearn.base import BaseEstimator, TransformerMixin
  4 | from sklearn.linear_model import LinearRegression
  5 | 
  6 | from gtime.causality.base import CausalityMixin
  7 | 
  8 | 
  9 | class ShiftedLinearCoefficient(BaseEstimator, TransformerMixin, CausalityMixin):
 10 |     """Test the shifted linear fit coefficients between two or more time series.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     min_shift : int, optional, default: ``1``
 15 |         The minimum number of shifts to check for.
 16 | 
 17 |     max_shift : int, optional, default: ``10``
 18 |         The maximum number of shifts to check for.
 19 | 
 20 |     target_col : str, optional, default: ``None``
 21 |         The column to use as the a reference (i.e., the column which is not
 22 |         shifted).
 23 | 
 24 |     dropna : bool, optional, default: ``False``
 25 |         Determines if the Nan values created by shifting are retained or dropped.
 26 | 
 27 |     bootstrap_iterations : int, optional, default: ``None``
 28 |         If not None, compute the p_values of the test, by performing bootstrapping of
 29 |         the original data (sampling with replacement).
 30 | 
 31 |     permutation_iterations : int, optional, default: ``None``
 32 |         If not None, compute the p_values of the test, by performing permutations of
 33 |         the original data.
 34 | 
 35 |     Examples
 36 |     --------
 37 | 
 38 |     >>> from gtime.causality.linear_coefficient import ShiftedLinearCoefficient
 39 |     >>> import pandas.util.testing as testing
 40 |     >>> data = testing.makeTimeDataFrame(freq="s")
 41 |     >>> slc = ShiftedLinearCoefficient(target_col="A")
 42 |     >>> slc.fit(data)
 43 |     >>> slc.best_shifts_
 44 |     y  A  B  C  D
 45 |     x
 46 |     A  3  6  8  5
 47 |     B  9  9  4  1
 48 |     C  8  2  4  9
 49 |     D  3  9  4  3
 50 |     >>> slc.max_corrs_
 51 |     y         A         B         C         D
 52 |     x
 53 |     A  0.460236  0.420005  0.339370  0.267143
 54 |     B  0.177856  0.300350  0.367150  0.550490
 55 |     C  0.484860  0.263036  0.456046  0.251342
 56 |     D  0.580068  0.344688  0.253626  0.256220
 57 |     """
 58 | 
 59 |     def __init__(
 60 |         self,
 61 |         min_shift: int = 1,
 62 |         max_shift: int = 10,
 63 |         target_col: str = None,
 64 |         dropna: bool = False,
 65 |         bootstrap_iterations: int = None,
 66 |         permutation_iterations: int = None,
 67 |     ):
 68 |         super().__init__(
 69 |             bootstrap_iterations=bootstrap_iterations,
 70 |             permutation_iterations=permutation_iterations,
 71 |         )
 72 |         self.min_shift = min_shift
 73 |         self.max_shift = max_shift
 74 |         self.target_col = target_col
 75 |         self.dropna = dropna
 76 | 
 77 |     def fit(self, data: pd.DataFrame) -> "ShiftedLinearCoefficient":
 78 |         """Create the DataFrame of shifts of each time series which maximize the shifted
 79 |          linear fit coefficients.
 80 | 
 81 |         Parameters
 82 |         ----------
 83 |         data : pd.DataFrame, shape (n_samples, n_time_series), required
 84 |             The DataFrame containing the time-series on which to compute the shifted
 85 |             linear fit coefficients.
 86 | 
 87 |         Returns
 88 |         -------
 89 |         self : ``ShiftedLinearCoefficient``
 90 | 
 91 |         """
 92 |         best_shifts = self._compute_best_shifts(data, self._get_max_coeff_shift)
 93 |         pivot_tables = self._create_pivot_tables(best_shifts)
 94 | 
 95 |         self.best_shifts_ = pivot_tables["best_shifts"]
 96 |         self.max_corrs_ = pivot_tables["max_corrs"]
 97 | 
 98 |         if self.bootstrap_iterations:
 99 |             self.bootstrap_p_values_ = pivot_tables["bootstrap_p_values"]
100 | 
101 |         if self.permutation_iterations:
102 |             self.permutation_p_values_ = pivot_tables["permutation_p_values"]
103 | 
104 |         return self
105 | 
106 |     def _get_max_coeff_shift(self, data: pd.DataFrame, x, y):
107 |         shifts = pd.DataFrame()
108 |         shifts[x] = data[x]
109 |         shifts[y] = data[y]
110 |         # print("shifts:", shifts)
111 |         # print("data:", data)
112 |         for shift in range(self.min_shift, self.max_shift + 1):
113 |             # print("data", shift, ":", data[x].shift(shift))
114 |             shifts[shift] = data[x].shift(shift)
115 | 
116 |         shifts = shifts.dropna()
117 | 
118 |         lf = LinearRegression().fit(
119 |             shifts[range(self.min_shift, self.max_shift + 1)].values, shifts[y].values
120 |         )
121 | 
122 |         q = lf.coef_.max(), np.argmax(lf.coef_) + (self.min_shift - 0)
123 |         return q
124 | 


--------------------------------------------------------------------------------
/gtime/feature_extraction/trend.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from scipy.optimize import minimize
  6 | from sklearn.base import BaseEstimator, TransformerMixin
  7 | from sklearn.metrics import mean_squared_error
  8 | from sklearn.utils.validation import check_is_fitted
  9 | 
 10 | from gtime.base import FeatureMixin, add_class_name
 11 | from gtime.utils.trends import TRENDS
 12 | 
 13 | __all__ = "Detrender"
 14 | 
 15 | 
 16 | class Detrender(BaseEstimator, TransformerMixin, FeatureMixin):
 17 |     """Apply a de-trend transformation to a time series.
 18 | 
 19 |     The purpose of the class is to fit a model, define through the `trend` parameter, in
 20 |     order to find a trend in the time series. Then, the trend can be removed by removing
 21 |     the predictions of the fitted model.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     trend : ``'polynomial'`` | ``'exponential'``, required
 26 |         The kind of trend removal to apply.
 27 | 
 28 |     trend_x0 : np.array, required
 29 |         Initialisation parameters passed to the trend function. This is used to select
 30 |         a starting point in order to minimize the `loss` function.
 31 | 
 32 |     loss : Callable, optional, default: ``mean_squared_error``
 33 |         The loss function to minimize.
 34 | 
 35 |     method : string, optional, default: ``"BFGS"``
 36 |         Loss function optimisation method.
 37 | 
 38 |     Examples
 39 |     --------
 40 |     >>> import pandas as pd
 41 |     >>> import numpy as np
 42 |     >>> from gtime.feature_extraction import Detrender
 43 |     >>> detrender = Detrender(trend='polynomial', trend_x0=np.zeros(2))
 44 |     >>> time_index = pd.date_range("2020-01-01", "2020-01-10")
 45 |     >>> X = pd.DataFrame(range(0, 10), index=time_index)
 46 |     >>> detrender.fit_transform(X)
 47 |                 0__Detrender
 48 |     2020-01-01  9.180937e-07
 49 |     2020-01-02  8.020709e-07
 50 |     2020-01-03  6.860481e-07
 51 |     2020-01-04  5.700253e-07
 52 |     2020-01-05  4.540024e-07
 53 |     2020-01-06  3.379796e-07
 54 |     2020-01-07  2.219568e-07
 55 |     2020-01-08  1.059340e-07
 56 |     2020-01-09 -1.008878e-08
 57 |     2020-01-10 -1.261116e-07
 58 | 
 59 |     """
 60 | 
 61 |     def __init__(
 62 |         self,
 63 |         trend: str,
 64 |         trend_x0: np.array,
 65 |         loss: Callable = mean_squared_error,
 66 |         method: str = "BFGS",
 67 |     ):
 68 |         self.trend = trend
 69 |         self.trend_x0 = trend_x0
 70 |         self.loss = loss
 71 |         self.method = method
 72 | 
 73 |     def fit(self, X: pd.DataFrame, y=None) -> "Detrender":
 74 |         """Fit the estimator.
 75 | 
 76 |         Parameters
 77 |         ----------
 78 |         X : pd.DataFrame, shape (n_samples, n_features)
 79 |             Input data.
 80 | 
 81 |         y : None
 82 |             There is no need of a target in a transformer, yet the pipeline API
 83 |             requires this parameter.
 84 | 
 85 |         Returns
 86 |         -------
 87 |         self : object
 88 |             Returns self.
 89 | 
 90 |         """
 91 | 
 92 |         # TODO: create validation function
 93 |         if self.trend not in TRENDS:
 94 |             raise ValueError(
 95 |                 "The trend '%s' is not supported. Supported "
 96 |                 "trends are %s." % (self.trend, list(sorted(TRENDS)))
 97 |             )
 98 | 
 99 |         self.best_trend_params_ = minimize(
100 |             lambda opt: self.loss(
101 |                 X.values, [TRENDS[self.trend](t, opt) for t in range(0, X.shape[0])]
102 |             ),
103 |             self.trend_x0,
104 |             method=self.method,
105 |             options={"disp": False},
106 |         )["x"]
107 | 
108 |         self.t0_ = X.index[0]
109 |         freq = X.index.freq
110 |         if freq is not None:
111 |             self.period_ = freq
112 |         else:
113 |             self.period_ = X.index[1] - X.index[0]
114 | 
115 |         return self
116 | 
117 |     @add_class_name
118 |     def transform(self, time_series: pd.DataFrame) -> pd.DataFrame:
119 |         """Transform the ``time_series`` by removing the trend.
120 | 
121 |         Parameters
122 |         ----------
123 |         time_series: pd.DataFrame, shape (n_samples, 1), required
124 |             The time series to transform.
125 | 
126 |         Returns
127 |         -------
128 |         time_series_t : pd.DataFrame, shape (n_samples, n_features)
129 |             The transformed time series, without the trend.
130 | 
131 |         """
132 |         check_is_fitted(self)
133 | 
134 |         time_steps = (time_series.index - self.t0_) / self.period_
135 | 
136 |         predictions = pd.Series(
137 |             index=time_series.index,
138 |             data=np.array(
139 |                 [TRENDS[self.trend](t, self.best_trend_params_) for t in time_steps]
140 |             ).flatten(),
141 |         )
142 | 
143 |         return time_series.sub(predictions, axis=0)
144 | 


--------------------------------------------------------------------------------
/gtime/plotting/tests/test_preprocessing.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import re
  4 | import pytest
  5 | import hypothesis.strategies as st
  6 | from hypothesis import given, settings, example
  7 | from gtime.utils.hypothesis.time_indexes import giotto_time_series, period_indexes
  8 | from gtime.plotting.preprocessing import (
  9 |     seasonal_split,
 10 |     acf,
 11 |     pacf,
 12 |     _get_cycle_names,
 13 |     _get_season_names,
 14 |     _autocorrelation,
 15 |     _normalize,
 16 |     _solve_yw_equation,
 17 |     _week_of_year,
 18 |     yule_walker,
 19 | )
 20 | 
 21 | 
 22 | class TestSplits:
 23 |     @given(t=period_indexes(min_length=1, max_length=1))
 24 |     @example(t=pd.PeriodIndex(["1974-12-31"], freq="W"))
 25 |     @example(t=pd.PeriodIndex(["1972-01-01"], freq="W"))
 26 |     @settings(deadline=None)
 27 |     def test_week_of_year(self, t):
 28 |         period = t[0]
 29 |         week = _week_of_year(period)
 30 |         assert re.match(r"\d{4}_\d\d?$", week)
 31 | 
 32 |     @given(
 33 |         df=giotto_time_series(min_length=3, max_length=500),
 34 |         cycle=st.one_of(
 35 |             st.sampled_from(["year", "quarter", "month", "week"]),
 36 |             st.from_regex(r"[1-9][DWMQY]", fullmatch=True),
 37 |         ),
 38 |     )
 39 |     @settings(deadline=None)
 40 |     def test__get_cycle_names_size(self, df, cycle):
 41 |         cycle = _get_cycle_names(df, cycle)
 42 |         assert len(cycle) == len(df)
 43 | 
 44 |     @given(
 45 |         df=giotto_time_series(min_length=3, max_length=500),
 46 |         cycle=st.one_of(
 47 |             st.sampled_from(["year", "quarter", "month", "week"]),
 48 |             st.from_regex(r"[1-9][DWMQY]", fullmatch=True),
 49 |         ),
 50 |         freq=st.from_regex(r"[1-9]?[DWMQ]", fullmatch=True),
 51 |     )
 52 |     @settings(deadline=None)
 53 |     def test__get_season_names_size(self, df, cycle, freq):
 54 |         seasons = _get_season_names(df, cycle, freq)
 55 |         assert len(seasons) == len(df)
 56 | 
 57 |     @given(
 58 |         df=giotto_time_series(min_length=3, max_length=500),
 59 |         cycle=st.one_of(
 60 |             st.sampled_from(["year", "quarter", "month", "week"]),
 61 |             st.from_regex(r"[1-9][DWMQY]", fullmatch=True),
 62 |         ),
 63 |         freq=st.one_of(st.from_regex(r"[1-9]?[DWMQ]", fullmatch=True), st.none()),
 64 |         agg=st.sampled_from(["mean", "sum", "last"]),
 65 |     )
 66 |     @settings(deadline=None)
 67 |     def test_seasonal_split_shape_named(self, df, cycle, freq, agg):
 68 |         split = seasonal_split(df, cycle=cycle, freq=freq, agg=agg)
 69 |         if freq is None:
 70 |             freq = df.index.freqstr
 71 |         assert split.stack().shape == df.resample(freq).agg(agg).dropna().shape
 72 | 
 73 | 
 74 | class TestAcf:
 75 |     @given(x=st.lists(st.floats(allow_nan=False), min_size=1))
 76 |     def test_autocorrelation(self, x):
 77 |         autocorr = _autocorrelation(np.array(x))
 78 |         expected = np.correlate(x, x, mode="full")[-len(x) :] / len(x)
 79 |         np.testing.assert_array_equal(autocorr, expected)
 80 | 
 81 |     @given(
 82 |         x=st.lists(
 83 |             st.floats(
 84 |                 allow_nan=False, allow_infinity=False, max_value=1e20, min_value=1e20
 85 |             ),
 86 |             min_size=1,
 87 |         )
 88 |     )
 89 |     def test_scale(self, x):
 90 |         scaled_x = _normalize(np.array(x))
 91 |         assert scaled_x.mean() == pytest.approx(0.0)
 92 |         assert scaled_x.std() == pytest.approx(1.0) or scaled_x.std() == pytest.approx(
 93 |             0.0
 94 |         )
 95 | 
 96 |     @given(x=st.lists(st.floats(allow_nan=False, allow_infinity=False), min_size=2))
 97 |     def test_solve_yw(self, x):
 98 |         rho = _solve_yw_equation(np.array(x))
 99 |         if not np.isnan(np.sum(rho)):
100 |             assert len(rho) == len(x) - 1
101 | 
102 |     @given(
103 |         x=st.lists(st.floats(allow_nan=False, allow_infinity=False), min_size=2),
104 |         order=st.integers(min_value=1),
105 |     )
106 |     def test_yule_walker_abs(self, x, order):
107 |         pacf = yule_walker(np.array(x), order)
108 |         if not (np.isnan(np.sum(pacf)) or len(pacf) == 0):
109 |             assert all(abs(pacf) <= 2)
110 | 
111 |     @given(
112 |         df=giotto_time_series(min_length=1, allow_nan=False, allow_infinity=False),
113 |         max_lag=st.one_of(st.integers(min_value=1, max_value=100), st.none()),
114 |     )
115 |     def test_acf_len(self, df, max_lag):
116 |         df_array = np.ravel(df.values)
117 |         res = acf(df_array, max_lag)
118 |         if max_lag is None:
119 |             max_lag = len(df)
120 |         assert len(res) == min(max_lag, len(df))
121 | 
122 |     @given(
123 |         df=giotto_time_series(
124 |             min_length=1, allow_nan=False, allow_infinity=False, max_length=50
125 |         ),
126 |         max_lag=st.one_of(st.integers(min_value=1, max_value=100), st.none()),
127 |     )
128 |     def test_pacf_len(self, df, max_lag):
129 |         df_array = np.ravel(df.values)
130 |         res = pacf(df_array, max_lag)
131 |         if max_lag is None:
132 |             max_lag = len(df)
133 |         assert len(res) == min(max_lag, len(df))
134 | 


--------------------------------------------------------------------------------
/gtime/forecasting/online.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from sklearn.base import BaseEstimator
  4 | from sklearn.utils.validation import check_array, check_X_y, check_random_state
  5 | 
  6 | 
  7 | def l1(a, b):
  8 |     return np.abs(np.subtract(a, b))
  9 | 
 10 | 
 11 | class HedgeForecaster(BaseEstimator):
 12 |     """Regressor model using Hedge algorithm.
 13 | 
 14 |     This algorithm is based on a multiplicative weight update method to create a dynamic combination of regressive
 15 |     models. In theory, there is no common training phase on data, only the loss is necessary to update the model.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 | 
 20 |     learning_rate : float, (default=0.001)
 21 |         The factor to use for the weight update.
 22 | 
 23 |     loss : callable, optional (default=`gtime.forecasting.online.l1`)
 24 |         Loss function use to compute loss matrix.
 25 | 
 26 |     random_state : int, RandomState instance or None, optional (default=None)
 27 |         Controls both the randomness of the bootstrapping of the samples used
 28 |         when building trees (if ``bootstrap=True``) and the sampling of the
 29 |         features to consider when looking for the best split at each node
 30 |         (if ``max_features < n_features``).
 31 |         # TODO: write glossary
 32 |         See :term:`Glossary <random_state>` for details.
 33 | 
 34 |     Attributes
 35 |     ----------
 36 |     loss_matrix_ : array, (n_samples, n_experts)
 37 |         Loss matrix between X and y.
 38 | 
 39 |     total_loss_ : int or float,
 40 |         Sum of losses based on Hedge algorithm decisions.
 41 | 
 42 |     weights_ : array, (n_experts)
 43 |         Last weight of each expert.
 44 | 
 45 |     decisions_ : array, (n_samples)
 46 |         Indices of chosen expert depending on weights.
 47 | 
 48 |     Examples
 49 |     --------
 50 |     >>> import pandas as pd
 51 |     >>> import numpy as np
 52 |     >>> from gtime.forecasting.online import HedgeForecaster
 53 |     >>> time_index = pd.date_range("2020-01-01", "2020-01-20")
 54 |     >>> X = pd.DataFrame(np.random.randint(4, size=(20, 3)), index=time_index)
 55 |     >>> y = pd.DataFrame(np.random.randint(4, size=(20, 1)), index=time_index, columns=["y_1"])
 56 |     >>> hr = HedgeForecaster(random_state=42)
 57 |     >>> hr.fit_predict(X, y).head()
 58 |                 0
 59 |     2020-01-01  2
 60 |     2020-01-02  0
 61 |     2020-01-03  3
 62 |     2020-01-04  3
 63 |     2020-01-05  2
 64 |     >>> print(f"Estimator weights: {hr.weights_}")
 65 |     Estimator weights: [0.97713925 0.97723619 0.97980439]
 66 |     >>> print(f"Decisions: {hr.decisions_}")
 67 |     Decisions: [1 2 2 1 0 0 0 2 1 2 0 2 2 0 0 0 0 1 1 0]
 68 |     >>> print(f"Total loss: {hr.total_loss_}")
 69 |     Total loss: 30
 70 | 
 71 |     """
 72 | 
 73 |     def __init__(
 74 |         self, learning_rate: float = 0.001, loss: callable = l1, random_state=None
 75 |     ):
 76 |         self.eps = learning_rate
 77 |         self.loss = loss
 78 |         self.random_state = random_state
 79 |         pass
 80 | 
 81 |     def hedge(self, timestamps, n_experts, loss, eps, random_state):
 82 |         weights = np.ones(n_experts)
 83 |         self.decisions_ = np.zeros(timestamps, dtype=int)
 84 | 
 85 |         total_loss = 0
 86 |         for t in range(timestamps):
 87 |             self.decisions_[t] = random_state.choice(
 88 |                 n_experts, p=weights / np.sum(weights)
 89 |             )
 90 |             total_loss += loss[t][np.int(self.decisions_[t])]
 91 |             weights *= np.exp(-eps * loss[t])
 92 |         return total_loss, weights
 93 | 
 94 |     def fit(self, X, y):
 95 |         """ Fit the model to data, compute weights and decisions iteratively.
 96 | 
 97 |         Parameters
 98 |         ----------
 99 |         X : array-like, shape (n_samples, n_features)
100 |             Data.
101 | 
102 |         Returns
103 |         -------
104 |         self : object
105 |         """
106 | 
107 |         random_state = check_random_state(self.random_state)
108 | 
109 |         #  FIXME: multi_output is not currently supported but mono-column dataframe is 2D (n, 1) so multi_output=True
110 |         #  makes it easier to handle
111 |         X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
112 | 
113 |         self.loss_matrix_ = self.loss(X, y)
114 | 
115 |         timestamps = len(X)
116 |         n_experts = X.shape[1]
117 | 
118 |         self.total_loss_, self.weights_ = self.hedge(
119 |             timestamps=timestamps,
120 |             n_experts=n_experts,
121 |             loss=self.loss_matrix_,
122 |             eps=self.eps,
123 |             random_state=random_state,
124 |         )
125 | 
126 |         return self
127 | 
128 |     def fit_predict(self, X, y):
129 |         """Fit and predict variable using Hedge algorithm.
130 | 
131 |         Parameters
132 |         ----------
133 |         X : (sparse) array-like, shape (n_samples, n_features)
134 |             Data.
135 | 
136 |         y : (sparse) array-like, shape (n_samples, n_outputs)
137 |             Predictions.
138 | 
139 |         Returns
140 |         -------
141 |         predictions : pd.DataFrame
142 |             Predictions.
143 |         """
144 |         self.fit(X, y)
145 | 
146 |         predictions = pd.DataFrame(
147 |             np.take_along_axis(check_array(X), self.decisions_.reshape(-1, 1), axis=1),
148 |             index=X.index,
149 |         )
150 | 
151 |         return predictions
152 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | Contributing guidelines
  2 | =======================
  3 | 
  4 | Pull Request Checklist
  5 | ----------------------
  6 | 
  7 | Before sending your pull requests, make sure you followed this list.
  8 |   - Read the `contributing guidelines <https://github.com/giotto-ai/giotto-time/blob/master/GOVERNANCE.rst>`_.
  9 |   - Read the `code of conduct <https://github.com/giotto-ai/giotto-time/blob/master/CODE_OF_CONDUCT.rst>`_.
 10 |   - Ensure you have signed the `contributor license agreement (CLA) <https://cla-assistant.io/giotto-learn/giotto-learn>`_.
 11 |   - Check if the changes are consistent with the guidelines.
 12 |   - Changes are consistent with the Coding Style.
 13 |   - Run Unit Tests.
 14 | 
 15 | How to become a contributor and submit your own code
 16 | ----------------------------------------------------
 17 | 
 18 | Contributor License Agreements
 19 | ------------------------------
 20 | 
 21 | In order to become a contributor of Giotto, the first step is to sign the
 22 | `contributor license agreement (CLA) <https://cla-assistant.io/giotto-learn/giotto-learn>`_.
 23 | **NOTE**: Only original source code from you and other people that have signed
 24 | the CLA can be accepted into the main repository.
 25 | 
 26 | Contributing code
 27 | -----------------
 28 | 
 29 | If you have improvements to Giotto, do not hesitate to send us pull requests!
 30 | Please follow the Github how to (https://help.github.com/articles/using-pull-requests/).
 31 | The Giotto Team will review your pull requests. Once the pull requests are approved and pass continuous integration checks, the
 32 | Giotto team will work on getting your pull request submitted to our GitHub
 33 | repository. Eventually, your pull request will be merged automatically on GitHub.
 34 | If you want to contribute, start working through the Giotto codebase,
 35 | navigate to the `GitHub issue tab <https://github.com/giotto-ai/giotto-time/issues>`_
 36 | and start looking through interesting issues. These are issues that we believe
 37 | are particularly well suited for outside contributions, often because we
 38 | probably won't get to them right now. If you decide to start on an issue, leave
 39 | a comment so that other people know that you're working on it. If you want to
 40 | help out, but not alone, use the issue comment thread to coordinate.
 41 | 
 42 | Contribution guidelines and standards
 43 | -------------------------------------
 44 | 
 45 | Before sending your pull request for review, make sure your changes are
 46 | consistent with the guidelines and follow the coding style below.
 47 | 
 48 | General guidelines and philosophy for contribution
 49 | --------------------------------------------------
 50 | 
 51 | * Include unit tests when you contribute new features, as they help to
 52 |   a) prove that your code works correctly, and
 53 |   b) guard against future breaking changes to lower the maintenance cost.
 54 | * Bug fixes also generally require unit tests, because the presence of bugs
 55 |   usually indicates insufficient test coverage.
 56 | * Keep API compatibility in mind when you change code in core Giotto.
 57 | * Clearly define your exceptions using the utils functions and test the exceptions.
 58 | * When you contribute a new feature to Giotto, the maintenance burden is   
 59 |   (by default) transferred to the Giotto team. This means that the benefit   
 60 |   of the contribution must be compared against the cost of maintaining the   
 61 |   feature.
 62 | 
 63 | C++ coding style
 64 | ----------------
 65 | 
 66 | Changes to Giotto C/C++ code should conform to `Google C++ Style Guide <https://google.github.io/styleguide/cppguide.html>`_.
 67 | Use `clang-tidy` to check your C/C++ changes. To install `clang-tidy` on
 68 | ubuntu:16.04, do:
 69 | 
 70 | 
 71 | .. code-block:: bash
 72 | 
 73 |     apt-get install -y clang-tidy
 74 | 
 75 | You can check a C/C++ file by doing:
 76 | 
 77 | .. code-block:: bash
 78 | 
 79 |     clang-format <my_cc_file> --style=google > /tmp/my_cc_file.ccdiff <my_cc_file> /tmp/my_cc_file.cc
 80 | 
 81 | Python coding style
 82 | -------------------
 83 | 
 84 | Changes to Giotto Python code should conform to PEP8 directives.
 85 | Use `flake8` to check your Python changes. To install `flake8` just do
 86 | 
 87 | .. code-block:: python
 88 | 
 89 |     pip install flake8
 90 | 
 91 | You can use `flake8` on your python code via the following instructions:
 92 | 
 93 | .. code-block:: python
 94 | 
 95 |     flake8 name_of_your_script.py
 96 | 
 97 | Git pre-commit hook
 98 | -------------------
 99 | We provide a pre-commit git hook to prevent accidental commits to the master branch and automatically formats the code
100 | using `black`. To activate, install the `pre-commit` library.
101 | 
102 | Development requirements
103 | ------------------------
104 | In order to contributing to giotto-time, some additional python packages are required with respect to the standard
105 | requirements. To install them, do
106 | 
107 | .. code-block:: python
108 | 
109 |     pip install -r dev-requirements.txt
110 | 
111 | Running unit tests
112 | ------------------
113 | 
114 | There are two ways to run Giotto unit tests.
115 | 
116 | 1. Using tools and libraries installed directly on your system. The election tool is `pytest`. To install `pytest` just do
117 | 
118 | .. code-block:: python
119 | 
120 |     pip install pytest
121 | 
122 | You can use `pytest` on your python code via the following instructions:
123 | 
124 | .. code-block:: python
125 | 
126 |     pytest name_of_your_script.py
127 | 
128 | 2. Using Azure (azure-pipelines.yml) and Giotto's CI scripts.
129 | 


--------------------------------------------------------------------------------
/gtime/hierarchical/tests/test_bottom_up.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import pytest
  4 | import sklearn
  5 | from hypothesis import given
  6 | import networkx as nx
  7 | import random
  8 | import hypothesis.strategies as st
  9 | from hypothesis.extra.numpy import arrays
 10 | from pytest import fixture
 11 | 
 12 | from gtime.hierarchical import HierarchicalBottomUp
 13 | from gtime.utils.fixtures import (
 14 |     time_series_forecasting_model1_no_cache,
 15 |     features1,
 16 |     model1,
 17 | )
 18 | from gtime.utils.hypothesis.time_indexes import giotto_time_series, period_indexes
 19 | 
 20 | 
 21 | @st.composite
 22 | def n_time_series_with_same_index(
 23 |     draw, min_length: int = 5, min_n: int = 1, max_n: int = 5,
 24 | ):
 25 |     n = draw(st.integers(min_value=min_n, max_value=max_n))
 26 |     index = draw(period_indexes(min_length=min_length))
 27 |     dictionary = {}
 28 |     for i in range(n):
 29 |         key = str(i)
 30 |         df_values = draw(
 31 |             arrays(
 32 |                 dtype=np.float64,
 33 |                 shape=index.shape[0],
 34 |                 elements=st.floats(allow_nan=False, allow_infinity=False, width=32),
 35 |             )
 36 |         )
 37 |         value = pd.DataFrame(index=index, data=df_values)
 38 |         dictionary[key] = value
 39 |     return dictionary
 40 | 
 41 | 
 42 | @st.composite
 43 | def tree_construction(draw, dictionary):
 44 |     tree_nodes = list(dictionary.keys())
 45 |     tree = nx.DiGraph()
 46 |     n = len(tree_nodes)
 47 |     for i in range(n):
 48 |         selected_key = random.choice(tree_nodes)
 49 |         if len(tree) > 0:
 50 |             selected_node = random.choice(list(tree.nodes))
 51 |             tree.add_edge(selected_node, selected_key)
 52 |         tree.add_node(selected_key)
 53 |         tree_nodes.remove(selected_key)
 54 |     return tree
 55 | 
 56 | 
 57 | @st.composite
 58 | def hierarchical_bottom_up_model(draw, time_series_forecasting_model1_no_cache):
 59 |     dataframes = draw(n_time_series_with_same_index(min_n=5))
 60 |     tree = draw(tree_construction(dataframes))
 61 |     return HierarchicalBottomUp(time_series_forecasting_model1_no_cache, tree)
 62 | 
 63 | 
 64 | @fixture(scope="function")
 65 | def hierarchical_basic_bottom_up_model(time_series_forecasting_model1_no_cache):
 66 |     return HierarchicalBottomUp(time_series_forecasting_model1_no_cache, "infer")
 67 | 
 68 | 
 69 | class TestHierarchicalBottomUp:
 70 |     def test_basic_constructor(self, time_series_forecasting_model1_no_cache):
 71 |         HierarchicalBottomUp(
 72 |             model=time_series_forecasting_model1_no_cache, hierarchy_tree="infer"
 73 |         )
 74 | 
 75 |     @given(dataframes=n_time_series_with_same_index(min_n=5))
 76 |     def test_fit_predict_basic_bottom_up_on_different_data(
 77 |         self, dataframes, hierarchical_basic_bottom_up_model
 78 |     ):
 79 |         hierarchical_basic_bottom_up_model.fit(dataframes).predict(dataframes)
 80 | 
 81 |     @given(dataframes=n_time_series_with_same_index(min_n=5))
 82 |     def test_fit_predict_basic_bottom_up(
 83 |         self, dataframes, hierarchical_basic_bottom_up_model
 84 |     ):
 85 |         hierarchical_basic_bottom_up_model.fit(dataframes).predict()
 86 | 
 87 |     @given(dataframes=n_time_series_with_same_index())
 88 |     def test_constructor(self, time_series_forecasting_model1_no_cache, dataframes):
 89 |         tree = tree_construction(dataframes)
 90 |         HierarchicalBottomUp(time_series_forecasting_model1_no_cache, tree)
 91 | 
 92 |     @given(data=st.data(), dataframes=n_time_series_with_same_index(min_n=5))
 93 |     def test_fit_predict_bottom_up(
 94 |         self, data, dataframes, time_series_forecasting_model1_no_cache
 95 |     ):
 96 |         model = data.draw(
 97 |             hierarchical_bottom_up_model(time_series_forecasting_model1_no_cache)
 98 |         )
 99 |         prediction = model.fit(dataframes).predict()
100 |         for key in dataframes.keys():
101 |             if key not in prediction.keys():
102 |                 raise ValueError
103 | 
104 |     @given(dataframes=n_time_series_with_same_index(min_n=5))
105 |     def test_fit_predict_on_subset_of_time_series(
106 |         self, dataframes, hierarchical_basic_bottom_up_model
107 |     ):
108 |         key = np.random.choice(list(dataframes.keys()), 1)[0]
109 |         hierarchical_basic_bottom_up_model.fit(dataframes)
110 |         hierarchical_basic_bottom_up_model.predict({key: dataframes[key]})
111 | 
112 |     def test_error_predict_not_fitted(self, hierarchical_basic_bottom_up_model):
113 |         with pytest.raises(sklearn.exceptions.NotFittedError):
114 |             hierarchical_basic_bottom_up_model.predict()
115 | 
116 |     @given(dataframes=n_time_series_with_same_index())
117 |     def test_error_with_bad_predict_key(
118 |         self, dataframes, hierarchical_basic_bottom_up_model
119 |     ):
120 |         correct_key = np.random.choice(list(dataframes.keys()), 1)[0]
121 |         bad_key = "".join(dataframes.keys()) + "bad_key"
122 |         hierarchical_basic_bottom_up_model.fit(dataframes)
123 |         with pytest.raises(KeyError):
124 |             hierarchical_basic_bottom_up_model.predict(
125 |                 {bad_key: dataframes[correct_key]}
126 |             )
127 | 
128 |     @given(time_series=giotto_time_series(min_length=5))
129 |     def test_error_fit_dataframe(self, time_series, hierarchical_basic_bottom_up_model):
130 |         with pytest.raises(ValueError):
131 |             hierarchical_basic_bottom_up_model.fit(time_series)
132 | 
133 |     @given(time_series=giotto_time_series(min_length=5))
134 |     def test_error_fit_key_not_string(
135 |         self, time_series, hierarchical_basic_bottom_up_model
136 |     ):
137 |         with pytest.raises(ValueError):
138 |             hierarchical_basic_bottom_up_model.fit({1: time_series})
139 | 
140 |     def test_error_fit_value_not_dataframe(self, hierarchical_basic_bottom_up_model):
141 |         with pytest.raises(ValueError):
142 |             hierarchical_basic_bottom_up_model.fit({"wrong_field": 12})
143 | 


--------------------------------------------------------------------------------
/gtime/utils/hypothesis/feature_matrices.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import hypothesis.strategies as st
  4 | import pandas as pd
  5 | from hypothesis.extra.numpy import arrays
  6 | from hypothesis.strategies import tuples, integers, floats
  7 | 
  8 | from gtime.utils.hypothesis.general_strategies import shape_X_y_matrices, shape_matrix
  9 | from .time_indexes import giotto_time_series
 10 | from ...compose import FeatureCreation
 11 | from ...model_selection import horizon_shift
 12 | 
 13 | 
 14 | @st.composite
 15 | def X_y_matrices(
 16 |     draw,
 17 |     horizon: int,
 18 |     df_transformer: FeatureCreation,
 19 |     min_length: Optional[int] = None,
 20 |     allow_nan_infinity: bool = True,
 21 | ):
 22 |     """ Returns a strategy that generates X and y feature matrices.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     horizon : ``int``, required
 27 |         The number of steps to forecast in the future. It affects the y shape.
 28 | 
 29 |     df_transformer : ``List[TimeSeriesFeature]``, required
 30 |         The list of TimeSeriesFeature that is given as input to the FeaturesCreation
 31 | 
 32 |     min_length : ``int``, optional, (default=``None``)
 33 |         Minimum length of the matrices
 34 | 
 35 |     allow_nan_infinity : ``bool``, optional, (default=``True``)
 36 |         Allow nan and infinity in the starting time series
 37 | 
 38 |     Returns
 39 |     -------
 40 |     X : pd.DataFrame
 41 |         X feature matrix
 42 | 
 43 |     y : pd.DataFrame
 44 |         y feature matrix
 45 |     """
 46 |     min_length = min_length if min_length is not None else 1
 47 |     period_index_series = draw(
 48 |         giotto_time_series(
 49 |             min_length=min_length,
 50 |             allow_nan=allow_nan_infinity,
 51 |             allow_infinity=allow_nan_infinity,
 52 |         )
 53 |     )
 54 |     X = df_transformer.fit_transform(period_index_series)
 55 |     y = horizon_shift(period_index_series, horizon=horizon)
 56 | 
 57 |     return X, y
 58 | 
 59 | 
 60 | @st.composite
 61 | def X_matrices(
 62 |     draw,
 63 |     df_transformer: FeatureCreation,
 64 |     min_length: Optional[int] = None,
 65 |     allow_nan_infinity: bool = True,
 66 | ):
 67 |     """ Returns a strategy that generates the X feature matrix.
 68 | 
 69 |     Parameters
 70 |     ----------
 71 |     df_transformer : ``List[TimeSeriesFeature]``, required
 72 |         the list of TimeSeriesFeature that is given as input to the
 73 |         FeaturesCreation
 74 | 
 75 |     min_length : ``int``, optional, (default=``None``)
 76 |         minimum length of the matrices
 77 | 
 78 |     allow_nan_infinity : ``bool``, optional, (default=``True``)
 79 |         allow nan and infinity in the starting time series
 80 | 
 81 |     Returns
 82 |     -------
 83 |     X : ``pd.DataFrame``
 84 |             X feature matrix
 85 |     """
 86 |     min_length = min_length if min_length is not None else 1
 87 |     period_index_series = draw(
 88 |         giotto_time_series(
 89 |             min_length=min_length,
 90 |             allow_nan=allow_nan_infinity,
 91 |             allow_infinity=allow_nan_infinity,
 92 |         )
 93 |     )
 94 | 
 95 |     X = df_transformer.fit_transform(period_index_series)
 96 |     return X
 97 | 
 98 | 
 99 | @st.composite
100 | def y_matrices(
101 |     draw,
102 |     horizon: int = 3,
103 |     min_length: Optional[int] = None,
104 |     allow_nan_infinity: bool = True,
105 | ):
106 |     """ Returns a strategy that generates the y feature matrix.
107 | 
108 |     Parameters
109 |     ----------
110 |     horizon : ``int``, optional, (default=3)
111 |         the number of steps to forecast in the future. It affects the y shape.
112 | 
113 |     min_length : ``int``, optional, (default=``None``)
114 |         minimum length of the matrices
115 | 
116 |     allow_nan_infinity : ``bool``, optional, (default=``True``)
117 |         allow nan and infinity in the starting time series
118 | 
119 |     Returns
120 |     -------
121 |     y : ``pd.DataFrame``
122 |             y feature matrix
123 |     """
124 |     min_length = min_length if min_length is not None else 1
125 |     period_index_series = draw(
126 |         giotto_time_series(
127 |             min_length=min_length,
128 |             allow_nan=allow_nan_infinity,
129 |             allow_infinity=allow_nan_infinity,
130 |         )
131 |     )
132 | 
133 |     y = horizon_shift(period_index_series, horizon=horizon)
134 | 
135 |     return y
136 | 
137 | 
138 | @st.composite
139 | def numpy_X_y_matrices(
140 |     draw,
141 |     X_y_shapes=shape_X_y_matrices(),
142 |     min_value: float = None,
143 |     max_value: float = None,
144 |     allow_nan: bool = False,
145 |     allow_infinity: bool = False,
146 | ):
147 |     if isinstance(X_y_shapes, tuple) or isinstance(X_y_shapes, list):
148 |         X_shape, y_shape = X_y_shapes
149 |     else:
150 |         X_shape, y_shape = draw(X_y_shapes)
151 |     if X_shape[0] != y_shape[0]:
152 |         raise ValueError(f"X.shape[0] must be == y.shape[0]: {X_shape}, {y_shape}")
153 |     if X_shape[0] <= X_shape[1]:
154 |         raise ValueError(f"X.shape[0] must be <= X.shape[1]: {X_shape}")
155 | 
156 |     elements = floats(
157 |         min_value=min_value,
158 |         max_value=max_value,
159 |         allow_nan=allow_nan,
160 |         allow_infinity=allow_infinity,
161 |     )
162 |     X = draw(arrays(dtype=float, shape=X_shape, elements=elements,))
163 |     y = draw(arrays(dtype=float, shape=y_shape, elements=elements,))
164 |     return X, y
165 | 
166 | 
167 | @st.composite
168 | def numpy_X_matrices(
169 |     draw,
170 |     shape=shape_matrix(),
171 |     min_value: float = None,
172 |     max_value: float = None,
173 |     allow_nan: bool = False,
174 |     allow_infinity: bool = False,
175 | ):
176 |     if not isinstance(shape, tuple) and not isinstance(shape, list):
177 |         shape = draw(shape)
178 |     if shape[0] <= shape[1]:
179 |         raise ValueError(f"X.shape[0] must be <= X.shape[1]: {shape}")
180 | 
181 |     elements = floats(
182 |         min_value=min_value,
183 |         max_value=max_value,
184 |         allow_nan=allow_nan,
185 |         allow_infinity=allow_infinity,
186 |     )
187 | 
188 |     X = draw(arrays(dtype=float, shape=shape, elements=elements,))
189 |     return X
190 | 


--------------------------------------------------------------------------------
/gtime/causality/base.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from itertools import product
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | from scipy import stats
  7 | from sklearn.utils.validation import check_is_fitted
  8 | 
  9 | 
 10 | class CausalityMixin:
 11 |     """ Base class for causality tests. """
 12 | 
 13 |     def __init__(self, bootstrap_iterations, permutation_iterations):
 14 |         self.bootstrap_iterations = bootstrap_iterations
 15 |         self.permutation_iterations = permutation_iterations
 16 | 
 17 |     def transform(self, data: pd.DataFrame) -> pd.DataFrame:
 18 |         """Shifts each input time series by the amount which optimizes correlation with
 19 |         the selected 'target_col' column. If no target column is specified, the first
 20 |         column of the DataFrame is taken as the target.
 21 | 
 22 |         Parameters
 23 |         ----------
 24 |         data : pd.DataFrame, shape (n_samples, n_time_series), required
 25 |             The DataFrame containing the time series on which to perform the
 26 |             transformation.
 27 | 
 28 |         Returns
 29 |         -------
 30 |         data_t : pd.DataFrame, shape (n_samples, n_time_series)
 31 |             The DataFrame (Pivot table) of the shifts which maximize the correlation
 32 |             between each time series. The shift is indicated in rows.
 33 | 
 34 |         """
 35 |         check_is_fitted(self)
 36 |         data_t = data.copy()
 37 | 
 38 |         if self.target_col is None:
 39 |             self.target_col = data_t.columns[0]
 40 |             warnings.warn(
 41 |                 "The target column was not specified. Therefore, the first "
 42 |                 f"column {self.target_col } of the DataFrame was taken as "
 43 |                 "target column. If you want to transform with respect to "
 44 |                 "another column, please use it as a target column."
 45 |             )
 46 | 
 47 |         for col in data_t:
 48 |             if col != self.target_col:
 49 |                 data_t[col] = data_t[col].shift(self.best_shifts_[self.target_col][col])
 50 |         if self.dropna:
 51 |             data_t = data_t.dropna()
 52 | 
 53 |         return data_t
 54 | 
 55 |     def _initialize_table(self):
 56 |         best_shifts = pd.DataFrame(columns=["x", "y", "shift", "max_corr"])
 57 |         column_types = {
 58 |             "x": np.float64,
 59 |             "y": np.float64,
 60 |             "shift": np.int64,
 61 |             "max_corr": np.int64,
 62 |         }
 63 | 
 64 |         if self.bootstrap_iterations:
 65 |             best_shifts = best_shifts.reindex(
 66 |                 best_shifts.columns.tolist() + ["p_values"], axis=1
 67 |             )
 68 |             column_types["p_values"] = np.float64
 69 | 
 70 |         best_shifts = best_shifts.astype(column_types)
 71 |         return best_shifts
 72 | 
 73 |     def _compute_best_shifts(self, data, shift_func):
 74 |         best_shifts = self._initialize_table()
 75 | 
 76 |         if self.target_col is None:
 77 |             columns_to_shift = [(x, y) for x, y in product(data.columns, repeat=2)]
 78 | 
 79 |         else:
 80 |             columns_to_shift = [(col, self.target_col) for col in data.columns]
 81 | 
 82 |         for (x, y) in columns_to_shift:
 83 |             res = shift_func(data, x=x, y=y)
 84 |             best_shift = res[1]
 85 |             max_corr = res[0]
 86 |             tables = {
 87 |                 "x": x,
 88 |                 "y": y,
 89 |                 "shift": best_shift,
 90 |                 "max_corr": max_corr,
 91 |             }
 92 |             if self.bootstrap_iterations:
 93 |                 bootstrap_p_value = self._compute_p_values(
 94 |                     data=data, x=x, y=y, shift=best_shift, test_type="bootstrap"
 95 |                 )
 96 |                 tables["bootstrap_p_values"] = bootstrap_p_value
 97 | 
 98 |             if self.permutation_iterations:
 99 |                 bootstrap_p_value = self._compute_p_values(
100 |                     data=data, x=x, y=y, shift=best_shift, test_type="permutation"
101 |                 )
102 |                 tables["permutation_p_values"] = bootstrap_p_value
103 | 
104 |             best_shifts = best_shifts.append(tables, ignore_index=True,)
105 | 
106 |         return best_shifts
107 | 
108 |     def _compute_p_values(self, data, x, y, shift, test_type):
109 |         data_t = data.copy()
110 |         data_t[x] = data_t.shift(shift)[x]
111 |         data_t.dropna(axis=0, inplace=True)
112 |         rhos = []
113 |         n_iterations = (
114 |             self.permutation_iterations
115 |             if test_type == "permutation"
116 |             else self.bootstrap_iterations
117 |         )
118 | 
119 |         for k in range(n_iterations):
120 |             if test_type == "permutation":
121 |                 samples = data_t.sample(frac=1)
122 |             else:
123 |                 samples = data_t.sample(n=len(data), replace=True)
124 | 
125 |             rhos.append(stats.pearsonr(samples[x], samples[y])[0])
126 |         rhos = pd.DataFrame(rhos)
127 |         percentiles = stats.percentileofscore(rhos, 0) / 100
128 |         # print("percentile: ", percentiles)
129 |         p_values = [2*percentile if percentile < 0.5 else 1 - percentile for percentile in percentiles]
130 | 
131 |         return p_values
132 | 
133 |     def _create_pivot_tables(self, best_shifts):
134 |         pivot_best_shifts = pd.pivot_table(
135 |             best_shifts, index=["x"], columns=["y"], values="shift"
136 |         )
137 |         max_corrs = pd.pivot_table(
138 |             best_shifts, index=["x"], columns=["y"], values="max_corr"
139 |         )
140 | 
141 |         pivot_tables = {"best_shifts": pivot_best_shifts, "max_corrs": max_corrs}
142 | 
143 |         if self.bootstrap_iterations:
144 |             bootstrap_p_values = pd.pivot_table(
145 |                 best_shifts, index=["x"], columns=["y"], values="bootstrap_p_values"
146 |             )
147 |             pivot_tables["bootstrap_p_values"] = bootstrap_p_values
148 | 
149 |         if self.permutation_iterations:
150 |             permutation_p_values = pd.pivot_table(
151 |                 best_shifts, index=["x"], columns=["y"], values="permutation_p_values"
152 |             )
153 |             pivot_tables["permutation_p_values"] = permutation_p_values
154 | 
155 |         return pivot_tables
156 | 


--------------------------------------------------------------------------------
/gtime/preprocessing/tests/utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from typing import List, Union, Optional, Tuple
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | from pandas.testing import assert_series_equal
  7 | 
  8 | from gtime.preprocessing.time_series_conversion import (
  9 |     _SequenceToTimeIndexSeries,
 10 |     _PandasSeriesToTimeIndexSeries,
 11 |     _TimeIndexSeriesToPeriodIndexSeries,
 12 |     count_not_none,
 13 | )
 14 | from gtime.utils.testing_constants import DEFAULT_START, DEFAULT_FREQ
 15 | 
 16 | PandasDate = Union[datetime, pd.Timestamp, str]
 17 | 
 18 | 
 19 | def compare_output_of_input_sequence_to_expected_one(
 20 |     input_sequence, start, end, freq,
 21 | ):
 22 |     computed_pandas_series = transform_sequence_into_time_index_series(
 23 |         input_sequence, start, end, freq
 24 |     )
 25 |     expected_pandas_series = pandas_series_with_period_index(
 26 |         input_sequence, start, end, freq
 27 |     )
 28 |     assert_series_equal(computed_pandas_series, expected_pandas_series)
 29 | 
 30 | 
 31 | def compare_output_of_input_series_to_expected_one(
 32 |     input_sequence, start, end, freq,
 33 | ):
 34 |     computed_pandas_series = transform_series_into_time_index_series(
 35 |         input_sequence, start, end, freq
 36 |     )
 37 |     expected_pandas_series = pandas_series_with_period_index(
 38 |         input_sequence.values, start, end, freq
 39 |     )
 40 |     assert_series_equal(computed_pandas_series, expected_pandas_series)
 41 | 
 42 | 
 43 | def transform_sequence_into_time_index_series(
 44 |     array_like_object: Union[np.array, list, pd.Series],
 45 |     start: Optional[str] = None,
 46 |     end: Optional[str] = None,
 47 |     freq: Optional[str] = None,
 48 | ) -> pd.Series:
 49 |     time_series_conversion = _SequenceToTimeIndexSeries(start, end, freq)
 50 |     return time_series_conversion.transform(array_like_object)
 51 | 
 52 | 
 53 | def transform_series_into_time_index_series(
 54 |     array_like_object: Union[np.array, list, pd.Series],
 55 |     start: Optional[str] = None,
 56 |     end: Optional[str] = None,
 57 |     freq: Optional[str] = None,
 58 | ) -> pd.Series:
 59 |     time_series_conversion = _PandasSeriesToTimeIndexSeries(start, end, freq)
 60 |     return time_series_conversion.transform(array_like_object)
 61 | 
 62 | 
 63 | def transform_time_index_series_into_period_index_series(
 64 |     series: pd.Series, freq: pd.Timedelta = None,
 65 | ) -> pd.Series:
 66 |     to_period_conversion = _TimeIndexSeriesToPeriodIndexSeries(freq=freq)
 67 |     return to_period_conversion.transform(series)
 68 | 
 69 | 
 70 | def pandas_series_with_period_index(
 71 |     values: Union[np.array, List[float]],
 72 |     start: Optional[datetime] = None,
 73 |     end: Optional[datetime] = None,
 74 |     freq: Optional[pd.Timedelta] = None,
 75 | ) -> pd.Series:
 76 |     start, end, freq = _initialize_start_end_freq(start, end, freq)
 77 |     index = pd.period_range(start=start, end=end, periods=len(values), freq=freq,)
 78 |     return pd.Series(index=index, data=values, dtype=np.float64)
 79 | 
 80 | 
 81 | def _initialize_start_end_freq(
 82 |     start: PandasDate, end: PandasDate, freq: pd.Timedelta
 83 | ) -> Tuple[pd.Timestamp, pd.Timestamp, pd.Timedelta]:
 84 |     not_none_params = count_not_none(start, end, freq)
 85 |     if not_none_params == 0:
 86 |         start, end, freq = _default_params_initialization()
 87 |     elif not_none_params == 1:
 88 |         start, end, freq = _one_not_none_param_initialization(start, end, freq)
 89 |     elif not_none_params == 2:
 90 |         start, end, freq = _two_not_none_params_initialization(start, end, freq)
 91 |     else:
 92 |         raise ValueError(
 93 |             "Of the three parameters: start, end, and "
 94 |             "freq, exactly two must be specified"
 95 |         )
 96 |     return start, end, freq
 97 | 
 98 | 
 99 | def _default_params_initialization() -> Tuple[pd.Timestamp, pd.Timestamp, pd.Timedelta]:
100 |     start = DEFAULT_START
101 |     end = None
102 |     freq = DEFAULT_FREQ
103 |     return start, end, freq
104 | 
105 | 
106 | def _one_not_none_param_initialization(
107 |     start, end, freq
108 | ) -> Tuple[pd.Timestamp, pd.Timestamp, pd.Timedelta]:
109 |     if start is not None:
110 |         start = start
111 |         end = None
112 |         freq = DEFAULT_FREQ
113 |     elif end is not None:
114 |         start = None
115 |         end = end
116 |         freq = DEFAULT_FREQ
117 |     else:
118 |         start = DEFAULT_START
119 |         end = None
120 |         freq = freq
121 |     return start, end, freq
122 | 
123 | 
124 | def _two_not_none_params_initialization(
125 |     start, end, freq
126 | ) -> Tuple[pd.Timestamp, pd.Timestamp, pd.Timedelta]:
127 |     start = start
128 |     end = end
129 |     freq = freq
130 |     return start, end, freq
131 | 
132 | 
133 | def datetime_index_series_to_period_index_series(
134 |     datetime_index_series: pd.Series, freq: Optional[pd.Timedelta] = None
135 | ) -> pd.Series:
136 |     if datetime_index_series.index.freq is not None:
137 |         try:
138 |             return pd.Series(
139 |                 index=pd.PeriodIndex(datetime_index_series.index),
140 |                 data=datetime_index_series.values,
141 |             )
142 |         except Exception as e:
143 |             print(freq, datetime_index_series.index.freq)
144 |             raise e
145 |     else:
146 |         freq = "1D" if freq is None else freq
147 |         return pd.Series(
148 |             index=pd.PeriodIndex(datetime_index_series.index, freq=freq),
149 |             data=datetime_index_series.values,
150 |         )
151 | 
152 | 
153 | def timedelta_index_to_datetime(
154 |     index: pd.TimedeltaIndex, start: datetime = datetime(year=1970, month=1, day=1),
155 | ) -> pd.DatetimeIndex:
156 |     return start + index
157 | 
158 | 
159 | def timedelta_index_series_to_period_index_series(
160 |     timedelta_index_series: pd.Series, freq: Optional[pd.Timedelta] = None
161 | ) -> pd.Series:
162 |     datetime_index = timedelta_index_to_datetime(timedelta_index_series.index)
163 |     if datetime_index.freq is None:
164 |         freq = "1D" if freq is None else freq
165 |         period_index = pd.PeriodIndex(datetime_index, freq=freq)
166 |     else:
167 |         period_index = pd.PeriodIndex(datetime_index)
168 |     return pd.Series(index=period_index, data=timedelta_index_series.values)
169 | 


--------------------------------------------------------------------------------
/gtime/time_series_models/simple_models.py:
--------------------------------------------------------------------------------
  1 | from gtime.compose import FeatureCreation
  2 | from sklearn.compose import make_column_selector
  3 | from gtime.feature_extraction import Shift, MovingAverage, MovingCustomFunction
  4 | from gtime.time_series_models import TimeSeriesForecastingModel
  5 | from gtime.forecasting import (
  6 |     NaiveForecaster,
  7 |     SeasonalNaiveForecaster,
  8 |     DriftForecaster,
  9 |     AverageForecaster,
 10 | )
 11 | 
 12 | 
 13 | class Naive(TimeSeriesForecastingModel):
 14 |     """ Naive model pipeline, no feature creation and ``NaiveModel()`` as a model
 15 | 
 16 |         Parameters
 17 |         ----------
 18 |         horizon: int - prediction horizon, in time series periods
 19 | 
 20 |         Examples
 21 |         --------
 22 |         >>> import pandas as pd
 23 |         >>> import numpy as np
 24 |         >>> from gtime.time_series_models import Naive
 25 |         >>> idx = pd.period_range(start='2011-01-01', end='2012-01-01')
 26 |         >>> np.random.seed(0)
 27 |         >>> df = pd.DataFrame(np.random.random((len(idx), 1)), index=idx, columns=['1'])
 28 |         >>> model = Naive(horizon=4)
 29 |         >>> model.fit(df)
 30 |         >>> model.predict()
 31 |                          y_1       y_2       y_3       y_4
 32 |         2011-12-29  0.543806  0.543806  0.543806  0.543806
 33 |         2011-12-30  0.456911  0.456911  0.456911  0.456911
 34 |         2011-12-31  0.882041  0.882041  0.882041  0.882041
 35 |         2012-01-01  0.458604  0.458604  0.458604  0.458604
 36 |     """
 37 | 
 38 |     def __init__(self, horizon: int):
 39 |         features = [
 40 |             ("s1", Shift(0), make_column_selector()),
 41 |         ]
 42 |         super().__init__(features=features, horizon=horizon, model=NaiveForecaster())
 43 | 
 44 | 
 45 | class Average(TimeSeriesForecastingModel):
 46 |     """ Average model pipeline, no feature creation and ``AverageModel()`` as a model
 47 | 
 48 |         Parameters
 49 |         ----------
 50 |         horizon: int - prediction horizon, in time series periods
 51 | 
 52 |         Examples
 53 |         --------
 54 |         >>> import pandas as pd
 55 |         >>> import numpy as np
 56 |         >>> from gtime.time_series_models import Average
 57 |         >>> idx = pd.period_range(start='2011-01-01', end='2012-01-01')
 58 |         >>> np.random.seed(0)
 59 |         >>> df = pd.DataFrame(np.random.random((len(idx), 1)), index=idx, columns=['1'])
 60 |         >>> model = Average(horizon=5)
 61 |         >>> model.fit(df)
 62 |         >>> model.predict()
 63 |                          y_1       y_2       y_3       y_4       y_5
 64 |         2011-12-28  0.558475  0.558475  0.558475  0.558475  0.558475
 65 |         2011-12-29  0.556379  0.556379  0.556379  0.556379  0.556379
 66 |         2011-12-30  0.543946  0.543946  0.543946  0.543946  0.543946
 67 |         2011-12-31  0.581512  0.581512  0.581512  0.581512  0.581512
 68 |         2012-01-01  0.569221  0.569221  0.569221  0.569221  0.569221
 69 | 
 70 |     """
 71 | 
 72 |     def __init__(self, horizon: int):
 73 |         features = [
 74 |             ("s1", Shift(0), make_column_selector()),
 75 |         ]
 76 |         super().__init__(features=features, horizon=horizon, model=AverageForecaster())
 77 | 
 78 | 
 79 | class SeasonalNaive(TimeSeriesForecastingModel):
 80 |     """ Seasonal naive model pipeline, no feature creation and ``SeasonalNaiveModel()`` as a model
 81 | 
 82 |         Parameters
 83 |         ----------
 84 |         horizon: int - prediction horizon, in time series periods
 85 |         seasonal_length: int - full season cycle length, in time series periods
 86 | 
 87 |         Examples
 88 |         --------
 89 | 
 90 |         >>> import pandas as pd
 91 |         >>> import numpy as np
 92 |         >>> from gtime.time_series_models import SeasonalNaive
 93 |         >>> idx = pd.period_range(start='2011-01-01', end='2012-01-01')
 94 |         >>> np.random.seed(0)
 95 |         >>> df = pd.DataFrame(np.random.random((len(idx), 1)), index=idx, columns=['1'])
 96 |         >>> model = SeasonalNaive(horizon=5, seasonal_length=4)
 97 |         >>> model.fit(df)
 98 |         >>> model.predict()
 99 | 
100 |                          y_1       y_2       y_3       y_4       y_5
101 |         2011-12-28  0.392676  0.956406  0.187131  0.128861  0.392676
102 |         2011-12-29  0.956406  0.187131  0.128861  0.392676  0.956406
103 |         2011-12-30  0.187131  0.128861  0.392676  0.956406  0.187131
104 |         2011-12-31  0.128861  0.392676  0.956406  0.187131  0.128861
105 |         2012-01-01  0.392676  0.956406  0.187131  0.128861  0.392676
106 |     """
107 | 
108 |     def __init__(self, horizon: int, seasonal_length: int):
109 |         features = [
110 |             ("s1", Shift(0), make_column_selector()),
111 |         ]
112 |         self.seasonal_length = seasonal_length
113 |         self.horizon = horizon
114 |         super().__init__(
115 |             features=features,
116 |             horizon=horizon,
117 |             model=SeasonalNaiveForecaster(seasonal_length),
118 |         )
119 | 
120 | 
121 | class Drift(TimeSeriesForecastingModel):
122 |     """ Simple drift model pipeline, no feature creation and ``DriftModel()`` as a model
123 | 
124 |         Parameters
125 |         ----------
126 |         horizon: int - prediction horizon, in time series periods
127 | 
128 |         Examples
129 |         --------
130 | 
131 |         >>> import pandas as pd
132 |         >>> import numpy as np
133 |         >>> from gtime.time_series_models import Drift
134 |         >>> idx = pd.period_range(start='2011-01-01', end='2012-01-01')
135 |         >>> np.random.seed(0)
136 |         >>> df = pd.DataFrame(np.random.random((len(idx), 1)), index=idx, columns=['1'])
137 |         >>> model = Drift(horizon=5)
138 |         >>> model.fit(df)
139 |         >>> model.predict()
140 | 
141 |                          y_1       y_2       y_3       y_4       y_5
142 |         2011-12-28  0.903984  0.902982  0.901980  0.900978  0.899976
143 |         2011-12-29  0.543806  0.542804  0.541802  0.540800  0.539798
144 |         2011-12-30  0.456911  0.455910  0.454908  0.453906  0.452904
145 |         2011-12-31  0.882041  0.881040  0.880038  0.879036  0.878034
146 |         2012-01-01  0.458604  0.457602  0.456600  0.455598  0.454596
147 | 
148 |     """
149 | 
150 |     def __init__(self, horizon: int):
151 |         features = [
152 |             ("s1", Shift(0), make_column_selector()),
153 |         ]
154 |         super().__init__(features=features, horizon=horizon, model=DriftForecaster())
155 | 


--------------------------------------------------------------------------------
/gtime/time_series_models/tests/test_cv_pipeline.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import pandas as pd
  3 | import numpy as np
  4 | from hypothesis import given, settings
  5 | import hypothesis.strategies as st
  6 | from gtime.time_series_models import CVPipeline
  7 | from gtime.metrics import max_error, mae, rmse, log_mse
  8 | from gtime.time_series_models import (
  9 |     AR,
 10 |     Naive,
 11 |     SeasonalNaive,
 12 |     TimeSeriesForecastingModel,
 13 | )
 14 | from gtime.feature_extraction import MovingAverage, Shift
 15 | from gtime.forecasting import NaiveForecaster, DriftForecaster
 16 | 
 17 | 
 18 | @st.composite
 19 | def draw_unique_subset(draw, lst):
 20 |     return draw(st.lists(st.sampled_from(lst), min_size=1, max_size=len(lst)))
 21 | 
 22 | 
 23 | @st.composite
 24 | def naive_model(draw):
 25 |     horizon = draw(
 26 |         st.lists(
 27 |             st.integers(min_value=1, max_value=20), min_size=1, max_size=4, unique=True
 28 |         )
 29 |     )
 30 |     return (Naive, {"horizon": horizon})
 31 | 
 32 | 
 33 | @st.composite
 34 | def seasonal_naive_model(draw):
 35 |     horizon = draw(
 36 |         st.lists(
 37 |             st.integers(min_value=1, max_value=20), min_size=1, max_size=4, unique=True
 38 |         )
 39 |     )
 40 |     seasonal_length = draw(
 41 |         st.lists(
 42 |             st.integers(min_value=1, max_value=10), min_size=1, max_size=4, unique=True
 43 |         )
 44 |     )
 45 |     return (SeasonalNaive, {"horizon": horizon, "seasonal_length": seasonal_length})
 46 | 
 47 | 
 48 | @st.composite
 49 | def ar_model(draw):
 50 |     horizon = draw(
 51 |         st.lists(
 52 |             st.integers(min_value=1, max_value=20), min_size=1, max_size=4, unique=True
 53 |         )
 54 |     )
 55 |     p = draw(
 56 |         st.lists(
 57 |             st.integers(min_value=1, max_value=20), min_size=1, max_size=4, unique=True
 58 |         )
 59 |     )
 60 |     explainer = draw(st.sampled_from([None, "lime", "shap"]))
 61 |     return (AR, {"horizon": horizon, "p": p, "explainer_type": [explainer]})
 62 | 
 63 | 
 64 | @st.composite
 65 | def models_grid(draw):
 66 |     model_list = [draw(ar_model()), draw(seasonal_naive_model()), draw(naive_model())]
 67 |     return dict(draw(draw_unique_subset(model_list)))
 68 | 
 69 | 
 70 | @st.composite
 71 | def metrics(draw):
 72 |     metric_list = [max_error, mae, rmse, log_mse]
 73 |     metrics = draw(draw_unique_subset(metric_list))
 74 |     metrics_dict = dict(zip([x.__name__ for x in metrics], metrics))
 75 |     return metrics_dict
 76 | 
 77 | 
 78 | class TestCVPipeline:
 79 |     @given(
 80 |         models=models_grid(),
 81 |         n_splits=st.integers(min_value=2, max_value=10),
 82 |         blocking=st.booleans(),
 83 |         metrics=metrics(),
 84 |     )
 85 |     def test_constructor(self, models, n_splits, blocking, metrics):
 86 |         cv_pipeline = CVPipeline(
 87 |             models_sets=models, n_splits=n_splits, blocking=blocking, metrics=metrics
 88 |         )
 89 |         list_len = np.sum(
 90 |             [np.prod([len(y) for y in x.values()]) for x in models.values()]
 91 |         )
 92 |         assert list_len == len(cv_pipeline.model_list)
 93 |         assert len(metrics) == len(cv_pipeline.metrics)
 94 | 
 95 |     @pytest.mark.parametrize(
 96 |         "models", [{Naive: {"horizon": [3]}, AR: {"horizon": [3], "p": [2, 3]}}]
 97 |     )
 98 |     @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}])
 99 |     @pytest.mark.parametrize("n_splits", [3, 5])
100 |     @pytest.mark.parametrize("blocking", [True, False])
101 |     @pytest.mark.parametrize("seed", [5, 1000])
102 |     def test_fit_predict(self, models, n_splits, blocking, metrics, seed):
103 |         cv_pipeline = CVPipeline(
104 |             models_sets=models, n_splits=n_splits, blocking=blocking, metrics=metrics
105 |         )
106 |         np.random.seed(seed)
107 |         idx = pd.period_range(start="2011-01-01", end="2012-01-01")
108 |         df = pd.DataFrame(
109 |             np.random.standard_normal((len(idx), 1)), index=idx, columns=["1"]
110 |         )
111 |         cv_pipeline.fit(df)
112 |         assert cv_pipeline.cv_results_.shape == (
113 |             len(cv_pipeline.model_list) * len(metrics),
114 |             4,
115 |         )
116 |         y_pred = cv_pipeline.predict()
117 |         horizon = cv_pipeline.best_model_.horizon
118 |         assert y_pred.shape == (horizon, horizon)
119 | 
120 |     @pytest.mark.parametrize(
121 |         "models",
122 |         [
123 |             {
124 |                 TimeSeriesForecastingModel: {
125 |                     "features": [
126 |                         [("s3", Shift(1), ["1"])],
127 |                         [("ma10", MovingAverage(10), ["1"])],
128 |                     ],
129 |                     "horizon": [4],
130 |                     "model": [NaiveForecaster(), DriftForecaster()],
131 |                 }
132 |             }
133 |         ],
134 |     )
135 |     @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}])
136 |     @pytest.mark.parametrize("n_splits", [5])
137 |     def test_model_assembly(self, models, n_splits, metrics):
138 |         cv_pipeline = CVPipeline(models_sets=models, n_splits=n_splits, metrics=metrics)
139 |         idx = pd.period_range(start="2011-01-01", end="2012-01-01")
140 |         df = pd.DataFrame(
141 |             np.random.standard_normal((len(idx), 1)), index=idx, columns=["1"]
142 |         )
143 |         cv_pipeline.fit(df)
144 |         assert cv_pipeline.cv_results_.shape == (
145 |             len(cv_pipeline.model_list) * len(metrics),
146 |             4,
147 |         )
148 |         y_pred = cv_pipeline.predict()
149 |         horizon = cv_pipeline.best_model_.horizon
150 |         assert y_pred.shape == (horizon, horizon)
151 | 
152 |     @pytest.mark.parametrize(
153 |         "models", [{Naive: {"horizon": [3]}, AR: {"horizon": [3], "p": [2, 3]}}]
154 |     )
155 |     @pytest.mark.parametrize("refit", ["all", "best", ["Naive: {'horizon': 3}"]])
156 |     def test_models_refit(self, models, refit):
157 |         cv_pipeline = CVPipeline(models_sets=models)
158 |         idx = pd.period_range(start="2011-01-01", end="2012-01-01")
159 |         df = pd.DataFrame(
160 |             np.random.standard_normal((len(idx), 1)), index=idx, columns=["1"]
161 |         )
162 |         cv_pipeline.fit(df, refit=refit)
163 |         assert cv_pipeline.cv_results_.shape == (len(cv_pipeline.model_list), 4,)
164 |         y_pred = cv_pipeline.predict()
165 |         horizon = cv_pipeline.best_model_.horizon
166 |         assert y_pred.shape == (horizon, horizon)
167 | 


--------------------------------------------------------------------------------
/gtime/feature_generation/tests/test_external.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import pytest
  3 | from hypothesis import given, strategies as st
  4 | 
  5 | if pd.__version__ >= "1.0.0":
  6 |     import pandas._testing as testing
  7 | else:
  8 |     import pandas.util.testing as testing
  9 | 
 10 | from gtime.feature_generation import Constant, PeriodicSeasonal
 11 | from gtime.utils.hypothesis.time_indexes import giotto_time_series
 12 | 
 13 | 
 14 | class TestPeriodicSesonalFeature:
 15 |     def test_missing_start_date_or_period(self):
 16 |         periodic_feature = PeriodicSeasonal()
 17 |         with pytest.raises(ValueError):
 18 |             periodic_feature.transform()
 19 | 
 20 |         periodic_feature = PeriodicSeasonal(index_period=1)
 21 |         with pytest.raises(ValueError):
 22 |             periodic_feature.transform()
 23 | 
 24 |         periodic_feature = PeriodicSeasonal(start_date="2010-01-01")
 25 |         with pytest.raises(ValueError):
 26 |             periodic_feature.transform()
 27 | 
 28 |     def test_string_period(self):
 29 |         testing.N, testing.K = 20, 1
 30 |         ts = testing.makeTimeDataFrame(freq="s")
 31 |         periodic_feature = PeriodicSeasonal(period="1 days")
 32 |         periodic_feature.transform(ts)
 33 | 
 34 |         assert type(periodic_feature.period) == pd.Timedelta
 35 | 
 36 |     def test_correct_start_date(self):
 37 |         testing.N, testing.K = 20, 1
 38 |         ts = testing.makeTimeDataFrame(freq="s")
 39 |         start_date = "2018-01-01"
 40 |         periodic_feature = PeriodicSeasonal(period="1 days", start_date=start_date)
 41 |         periodic_feature.transform(ts)
 42 | 
 43 |         assert periodic_feature.start_date == ts.index.values[0]
 44 | 
 45 |         periodic_feature = PeriodicSeasonal(
 46 |             period="3 days", index_period=10, start_date=start_date
 47 |         )
 48 |         periodic_feature.transform()
 49 |         assert periodic_feature.start_date == pd.to_datetime(start_date)
 50 | 
 51 |         start_date = pd.to_datetime("2018-01-01")
 52 |         periodic_feature = PeriodicSeasonal(
 53 |             period="3 days", index_period=10, start_date=start_date
 54 |         )
 55 |         periodic_feature.transform()
 56 |         assert periodic_feature.start_date == start_date
 57 | 
 58 |     def test_too_high_sampling_frequency(self):
 59 |         start_date = "2018-01-01"
 60 |         periodic_feature = PeriodicSeasonal(
 61 |             period="2 days",
 62 |             start_date=start_date,
 63 |             index_period=pd.date_range(start=start_date, end="2020-01-01", freq="W"),
 64 |         )
 65 |         with pytest.raises(ValueError):
 66 |             periodic_feature.transform()
 67 | 
 68 |     def test_correct_sinusoide(self):
 69 |         testing.N, testing.K = 30, 1
 70 |         ts = testing.makeTimeDataFrame(freq="MS")
 71 |         start_date = "2018-01-01"
 72 |         periodic_feature = PeriodicSeasonal(
 73 |             period="365 days",
 74 |             start_date=start_date,
 75 |             index_period=pd.date_range(start=start_date, end="2020-01-01", freq="W"),
 76 |         )
 77 |         output_sin = periodic_feature.transform(ts)
 78 |         expected_index = pd.DatetimeIndex(
 79 |             [
 80 |                 "2000-01-01",
 81 |                 "2000-02-01",
 82 |                 "2000-03-01",
 83 |                 "2000-04-01",
 84 |                 "2000-05-01",
 85 |                 "2000-06-01",
 86 |                 "2000-07-01",
 87 |                 "2000-08-01",
 88 |                 "2000-09-01",
 89 |                 "2000-10-01",
 90 |                 "2000-11-01",
 91 |                 "2000-12-01",
 92 |                 "2001-01-01",
 93 |                 "2001-02-01",
 94 |                 "2001-03-01",
 95 |                 "2001-04-01",
 96 |                 "2001-05-01",
 97 |                 "2001-06-01",
 98 |                 "2001-07-01",
 99 |                 "2001-08-01",
100 |                 "2001-09-01",
101 |                 "2001-10-01",
102 |                 "2001-11-01",
103 |                 "2001-12-01",
104 |                 "2002-01-01",
105 |                 "2002-02-01",
106 |                 "2002-03-01",
107 |                 "2002-04-01",
108 |                 "2002-05-01",
109 |                 "2002-06-01",
110 |             ],
111 |             dtype="datetime64[ns]",
112 |             freq="MS",
113 |         )
114 |         expected_df = pd.DataFrame.from_dict(
115 |             {
116 |                 f"0__{periodic_feature.__class__.__name__}": [
117 |                     0.0,
118 |                     0.25433547,
119 |                     0.42938198,
120 |                     0.49999537,
121 |                     0.43585316,
122 |                     0.25062091,
123 |                     0.0043035,
124 |                     -0.25062091,
125 |                     -0.43585316,
126 |                     -0.49999537,
127 |                     -0.42938198,
128 |                     -0.24688778,
129 |                     0.00860668,
130 |                     0.2617078,
131 |                     0.42938198,
132 |                     0.49999537,
133 |                     0.43585316,
134 |                     0.25062091,
135 |                     0.0043035,
136 |                     -0.25062091,
137 |                     -0.43585316,
138 |                     -0.49999537,
139 |                     -0.42938198,
140 |                     -0.24688778,
141 |                     0.00860668,
142 |                     0.2617078,
143 |                     0.42938198,
144 |                     0.49999537,
145 |                     0.43585316,
146 |                     0.25062091,
147 |                 ]
148 |             }
149 |         )
150 |         expected_df.index = expected_index
151 |         pd.testing.assert_frame_equal(output_sin, expected_df)
152 | 
153 | 
154 | class TestConstantFeature:
155 |     def test_correct_constant_feature(self):
156 |         constant = 12
157 |         df = pd.DataFrame.from_dict({"old_name": [0, 1, 2, 3, 4, 5]})
158 | 
159 |         constant_feature = Constant(constant=constant)
160 | 
161 |         df_constant = constant_feature.fit_transform(df)
162 |         expected_df_constant = pd.DataFrame.from_dict(
163 |             {
164 |                 f"0__{constant_feature.__class__.__name__}": [
165 |                     constant,
166 |                     constant,
167 |                     constant,
168 |                     constant,
169 |                     constant,
170 |                     constant,
171 |                 ]
172 |             }
173 |         )
174 | 
175 |         testing.assert_frame_equal(expected_df_constant, df_constant, check_dtype=False)
176 | 
177 |     @given(
178 |         giotto_time_series(
179 |             min_length=1,
180 |             start_date=pd.Timestamp(2000, 1, 1),
181 |             end_date=pd.Timestamp(2010, 1, 1),
182 |         ),
183 |         st.integers(0, 100),
184 |     )
185 |     def test_random_ts_and_constant(self, df: pd.DataFrame, constant: int):
186 | 
187 |         constant_feature = Constant(constant=constant)
188 |         df_constant = constant_feature.fit_transform(df)
189 | 
190 |         #  testing.assert_frame_equal(expected_df_constant, df_constant)
191 | 


--------------------------------------------------------------------------------
/gtime/feature_extraction/custom.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from sklearn.utils.validation import check_is_fitted
  3 | 
  4 | from ..base import add_class_name
  5 | from gtime.feature_extraction import MovingCustomFunction
  6 | 
  7 | 
  8 | class CrestFactorDetrending(MovingCustomFunction):
  9 |     """Crest factor detrending model.
 10 |     This class removes the trend from the data by using the crest factor definition.
 11 |     Each sample is normalize by its weighted surrounding.
 12 |     Generalized detrending is defined in (eq. 1) of: H. P. Tukuljac, V. Pulkki,
 13 |     H. Gamper, K. Godin, I. J. Tashev and N. Raghuvanshi, "A Sparsity Measure for Echo
 14 |     Density Growth in General Environments," ICASSP 2019 - 2019 IEEE International
 15 |     Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, United
 16 |     Kingdom, 2019, pp. 1-5.
 17 |     Parameters
 18 |     ----------
 19 |     window_size : int, optional, default: ``1``
 20 |         The number of previous points on which to compute the crest factor detrending.
 21 |     is_causal : bool, optional, default: ``True``
 22 |         Whether the current sample is computed based only on the past or also on the future.
 23 |     Examples
 24 |     >>> import pandas as pd
 25 |     >>> from CrestFactorDetrending import CrestFactorDetrending
 26 |     >>> ts = pd.DataFrame([0, 1, 2, 3, 4, 5])
 27 |     >>> gnrl_dtr = CrestFactorDetrending(window_size=2)
 28 |     >>> gnrl_dtr.fit_transform(ts)
 29 |        0__CrestFactorDetrending
 30 |     0                       NaN
 31 |     1                  1.000000
 32 |     2                  0.800000
 33 |     3                  0.692308
 34 |     4                  0.640000
 35 |     5                  0.609756
 36 |     --------
 37 |     """
 38 | 
 39 |     def __init__(self, window_size: int = 1, is_causal: bool = True):
 40 |         def detrend(signal):
 41 |             import numpy as np
 42 | 
 43 |             N = 2
 44 |             signal = np.array(signal)
 45 |             large_signal_segment = signal ** N
 46 |             large_segment_mean = np.sum(large_signal_segment)
 47 |             if self.is_causal:
 48 |                 ref_index = -1
 49 |             else:
 50 |                 ref_index = int(len(signal) / 2)
 51 |             small_signal_segment = signal[ref_index] ** N
 52 |             return small_signal_segment / large_segment_mean  # (eq. 1)
 53 | 
 54 |         super().__init__(detrend)
 55 |         self.window_size = window_size
 56 |         self.is_causal = is_causal
 57 | 
 58 |     @add_class_name
 59 |     def transform(self, time_series: pd.DataFrame) -> pd.DataFrame:
 60 |         """For every row of ``time_series``, compute the moving crest factor detrending function of the
 61 |          previous ``window_size`` elements.
 62 |         Parameters
 63 |         ----------
 64 |         time_series : pd.DataFrame, shape (n_samples, 1), required
 65 |             The DataFrame on which to compute the rolling moving custom function.
 66 |         Returns
 67 |         -------
 68 |         time_series_t : pd.DataFrame, shape (n_samples, 1)
 69 |             A DataFrame, with the same length as ``time_series``, containing the rolling
 70 |             moving custom function for each element.
 71 |         """
 72 |         check_is_fitted(self)
 73 | 
 74 |         if self.is_causal:
 75 |             time_series_mvg_dtr = time_series.rolling(self.window_size).apply(
 76 |                 self.custom_feature_function, raw=self.raw
 77 |             )
 78 |         else:
 79 |             time_series_mvg_dtr = time_series.rolling(
 80 |                 self.window_size, min_periods=int(self.window_size / 2)
 81 |             ).apply(self.custom_feature_function, raw=self.raw)
 82 |             time_series_mvg_dtr = time_series_mvg_dtr.dropna()
 83 | 
 84 |         time_series_t = time_series_mvg_dtr
 85 |         return time_series_t
 86 | 
 87 | 
 88 | class SortedDensity(MovingCustomFunction):
 89 |     """For each row in ``time_series``, compute the sorted density function of the
 90 |     previous ``window_size`` rows. If there are not enough rows, the value is ``Nan``.
 91 |     Sorted density measured is defined in (eq. 1) of: H. P. Tukuljac, V. Pulkki,
 92 |     H. Gamper, K. Godin, I. J. Tashev and N. Raghuvanshi, "A Sparsity Measure for Echo
 93 |     Density Growth in General Environments," ICASSP 2019 - 2019 IEEE International
 94 |     Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, United
 95 |     Kingdom, 2019, pp. 1-5.
 96 |     Parameters
 97 |     ----------
 98 |     window_size : int, optional, default: ``1``
 99 |         The number of previous points on which to compute the sorted density.
100 |     is_causal : bool, optional, default: ``True``
101 |         Whether the current sample is computed based only on the past or also on the future.
102 |     Examples
103 |     --------
104 |     >>> import pandas as pd
105 |     >>> from gtime.feature_extraction import SortedDensity
106 |     >>> ts = pd.DataFrame([0, 1, 2, 3, 4, 5])
107 |     >>> mv_avg = SortedDensity(window_size=2)
108 |     >>> mv_avg.fit_transform(ts)
109 |        0__SortedDensity
110 |     0                      NaN
111 |     1                 0.500000
112 |     2                 0.666667
113 |     3                 0.700000
114 |     4                 0.714286
115 |     5                 0.722222
116 |     --------
117 |     """
118 | 
119 |     def __init__(self, window_size: int = 1, is_causal: bool = True):
120 |         def sorted_density(signal):
121 |             import numpy as np
122 | 
123 |             t = np.array(range(len(signal))) + 1
124 |             signal = signal[signal.argsort()[::-1]]
125 |             t = np.reshape(t, signal.shape)
126 |             SD = np.sum(np.multiply(t, signal)) / np.sum(signal)  # (eq. 2)
127 |             SD = SD / (len(signal))
128 |             return SD
129 | 
130 |         super().__init__(sorted_density)
131 |         self.window_size = window_size
132 |         self.is_causal = is_causal
133 | 
134 |     @add_class_name
135 |     def transform(self, time_series: pd.DataFrame) -> pd.DataFrame:
136 |         """For every row of ``time_series``, compute the moving sorted density function of the
137 |          previous ``window_size`` elements.
138 |         Parameters
139 |         ----------
140 |         time_series : pd.DataFrame, shape (n_samples, 1), required
141 |             The DataFrame on which to compute the rolling moving custom function.
142 |         Returns
143 |         -------
144 |         time_series_t : pd.DataFrame, shape (n_samples, 1)
145 |             A DataFrame, with the same length as ``time_series``, containing the rolling
146 |             moving custom function for each element.
147 |         """
148 |         check_is_fitted(self)
149 | 
150 |         if self.is_causal:
151 |             time_series_mvg_sd = time_series.rolling(self.window_size).apply(
152 |                 self.custom_feature_function, raw=self.raw
153 |             )
154 |         else:
155 |             time_series_mvg_sd = time_series.rolling(
156 |                 self.window_size, min_periods=int(self.window_size / 2)
157 |             ).apply(self.custom_feature_function, raw=self.raw)
158 |             time_series_mvg_sd = time_series_mvg_sd.dropna()
159 | 
160 |         time_series_t = time_series_mvg_sd
161 |         return time_series_t
162 | 


--------------------------------------------------------------------------------