├── skpro
├── utils
│ ├── _maint
│ │ ├── __init__.py
│ │ ├── tests
│ │ │ ├── __init__.py
│ │ │ └── test_show_versions.py
│ │ └── _show_versions.py
│ ├── tests
│ │ ├── __init__.py
│ │ └── test_plots.py
│ ├── __init__.py
│ ├── deep_equals
│ │ └── __init__.py
│ ├── index.py
│ ├── numpy.py
│ ├── pandas.py
│ ├── sklearn.py
│ ├── random_state.py
│ ├── utils.py
│ ├── _doctest.py
│ └── retrieval.py
├── datatypes
│ ├── _adapter
│ │ └── __init__.py
│ ├── tests
│ │ └── __init__.py
│ ├── _convert_utils
│ │ ├── __init__.py
│ │ ├── _coerce.py
│ │ └── _convert.py
│ ├── _base
│ │ └── __init__.py
│ ├── _table
│ │ ├── __init__.py
│ │ ├── _registry.py
│ │ └── _base.py
│ ├── _proba
│ │ ├── __init__.py
│ │ └── _registry.py
│ ├── __init__.py
│ └── _common.py
├── regression
│ ├── parametric
│ │ └── __init__.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_glum.py
│ │ ├── test_ondil.py
│ │ ├── test_glm.py
│ │ └── test_cyclic_boosting.py
│ ├── adapters
│ │ ├── __init__.py
│ │ ├── ngboost
│ │ │ └── __init__.py
│ │ └── sklearn
│ │ │ └── __init__.py
│ ├── gam
│ │ └── __init__.py
│ ├── gp
│ │ └── __init__.py
│ ├── jackknife
│ │ └── __init__.py
│ ├── binned
│ │ └── __init__.py
│ ├── compose
│ │ └── __init__.py
│ ├── base
│ │ ├── adapters
│ │ │ ├── __init__.py
│ │ │ └── _sklearn.py
│ │ └── __init__.py
│ ├── ensemble
│ │ └── __init__.py
│ ├── online
│ │ └── __init__.py
│ ├── bayesian
│ │ └── __init__.py
│ ├── __init__.py
│ ├── conformal
│ │ └── __init__.py
│ └── linear
│ │ └── __init__.py
├── tests
│ ├── __init__.py
│ ├── tests
│ │ └── __init__.py
│ ├── scenarios
│ │ └── __init__.py
│ ├── _config_test_dummy.py
│ ├── _config.py
│ ├── utils.py
│ └── _test_vm.py
├── benchmarking
│ ├── __init__.py
│ └── tests
│ │ └── __init__.py
├── metrics
│ ├── tests
│ │ ├── __init__.py
│ │ └── test_distr_metrics.py
│ ├── survival
│ │ ├── tests
│ │ │ ├── __init__.py
│ │ │ └── test_c_harrell.py
│ │ └── __init__.py
│ ├── __init__.py
│ └── _coerce.py
├── registry
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_tags.py
│ │ └── test_scitype.py
│ ├── __init__.py
│ └── _scitype.py
├── survival
│ ├── __init__.py
│ ├── tree
│ │ └── __init__.py
│ ├── additive
│ │ └── __init__.py
│ ├── adapters
│ │ └── __init__.py
│ ├── coxph
│ │ └── __init__.py
│ ├── compose
│ │ └── __init__.py
│ ├── aft
│ │ └── __init__.py
│ ├── ensemble
│ │ └── __init__.py
│ └── base.py
├── distributions
│ ├── base
│ │ ├── tests
│ │ │ ├── __init__.py
│ │ │ └── test_multiindex.py
│ │ └── __init__.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_hurdle.py
│ │ ├── test_empirical.py
│ │ └── test_qpd.py
│ ├── adapters
│ │ ├── __init__.py
│ │ ├── scipy
│ │ │ ├── tests
│ │ │ │ └── __init__.py
│ │ │ ├── __init__.py
│ │ │ ├── _empirical.py
│ │ │ └── _distribution.py
│ │ └── statsmodels
│ │ │ ├── __init__.py
│ │ │ └── _empirical.py
│ ├── compose
│ │ └── __init__.py
│ ├── trafo
│ │ └── __init__.py
│ ├── poisson.py
│ ├── negative_binomial.py
│ ├── geometric.py
│ ├── binomial.py
│ ├── left_truncated.py
│ ├── erlang.py
│ ├── loggamma.py
│ ├── fisk.py
│ ├── halfnormal.py
│ ├── inversegamma.py
│ ├── alpha.py
│ ├── loglaplace.py
│ ├── halfcauchy.py
│ ├── halflogistic.py
│ ├── inversegaussian.py
│ ├── __init__.py
│ ├── exponential.py
│ └── truncated_normal.py
├── __init__.py
├── model_selection
│ └── __init__.py
└── base
│ ├── __init__.py
│ └── _base.py
├── docs
├── _static
│ ├── .gitignore
│ ├── base_api.png
│ ├── overview.png
│ ├── logo
│ │ ├── logo.png
│ │ └── skpro-banner.png
│ ├── parametric.png
│ ├── pymc_example_plot.png
│ └── simple_example_plot.png
├── source
│ ├── about
│ │ ├── contributors.md
│ │ ├── roadmap.rst
│ │ ├── team.rst
│ │ ├── mission.rst
│ │ ├── governance.rst
│ │ └── history.rst
│ ├── contribute
│ │ ├── team.rst
│ │ └── code_of_conduct.rst
│ ├── images
│ │ └── skpro-banner.png
│ ├── related_software.rst
│ ├── _static
│ │ ├── class.rst
│ │ ├── function.rst
│ │ └── class_with_call.rst
│ ├── api_reference
│ │ ├── base.rst
│ │ ├── metrics.rst
│ │ ├── utils.rst
│ │ └── distributions.rst
│ ├── developer_guide
│ │ ├── add_estimators.rst
│ │ └── reviewer_guide.rst
│ ├── tutorials.rst
│ ├── api_reference.rst
│ ├── get_started.rst
│ ├── includes
│ │ └── api_css.rst
│ ├── user_guide.rst
│ ├── contribute.rst
│ ├── users.rst
│ ├── developer_guide.rst
│ ├── about.rst
│ └── index.rst
└── Makefile
├── .github
├── workflows
│ ├── cancel.yml
│ ├── dependency-review.yml
│ ├── update_contributors.yml
│ └── wheels.yml
├── ISSUE_TEMPLATE
│ ├── report-all-other-issues-or-questions.md
│ ├── maintenance-issue.md
│ ├── documentation-issue.md
│ ├── feature_request.md
│ └── bug_report.md
├── dependabot.yml
└── .codecov.yml
├── AUTHORS.rst
├── .readthedocs.yml
├── .coveragerc
├── CITATION.rst
├── CODEOWNERS
├── .gitignore
├── .binder
└── Dockerfile
├── conftest.py
├── setup.cfg
├── CONTRIBUTING.md
├── LICENSE.txt
├── Makefile
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
└── pyproject.toml
/skpro/utils/_maint/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/skpro/datatypes/_adapter/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/skpro/regression/parametric/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/_static/.gitignore:
--------------------------------------------------------------------------------
1 | # Empty directory
2 |
--------------------------------------------------------------------------------
/skpro/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for skpro package."""
2 |
--------------------------------------------------------------------------------
/skpro/utils/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for utilities."""
2 |
--------------------------------------------------------------------------------
/skpro/benchmarking/__init__.py:
--------------------------------------------------------------------------------
1 | """Benchmarking and evaluation."""
2 |
--------------------------------------------------------------------------------
/skpro/datatypes/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for data types module."""
2 |
--------------------------------------------------------------------------------
/skpro/tests/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for the test utilities."""
2 |
--------------------------------------------------------------------------------
/skpro/metrics/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for probabilistic metrics."""
2 |
--------------------------------------------------------------------------------
/skpro/tests/scenarios/__init__.py:
--------------------------------------------------------------------------------
1 | """Test scenarios for estimators."""
2 |
--------------------------------------------------------------------------------
/docs/source/about/contributors.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../../CONTRIBUTORS.md
2 | ```
3 |
--------------------------------------------------------------------------------
/skpro/utils/_maint/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for the show_versions utility."""
2 |
--------------------------------------------------------------------------------
/skpro/benchmarking/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for benchmarking and evaluation."""
2 |
--------------------------------------------------------------------------------
/skpro/datatypes/_convert_utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Conversion auxiliary utilities."""
2 |
--------------------------------------------------------------------------------
/skpro/registry/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for registry and lookup functionality."""
2 |
--------------------------------------------------------------------------------
/skpro/survival/__init__.py:
--------------------------------------------------------------------------------
1 | """Survival or time-to-event prediction estimators."""
2 |
--------------------------------------------------------------------------------
/docs/source/contribute/team.rst:
--------------------------------------------------------------------------------
1 | .. _contrib_team:
2 |
3 | .. include:: ../about/team.rst
4 |
--------------------------------------------------------------------------------
/skpro/distributions/base/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for skpro distribution base class."""
2 |
--------------------------------------------------------------------------------
/skpro/regression/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for probabilistic supervised regressors."""
2 |
--------------------------------------------------------------------------------
/docs/_static/base_api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/base_api.png
--------------------------------------------------------------------------------
/docs/_static/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/overview.png
--------------------------------------------------------------------------------
/skpro/distributions/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for skpro probability distribution objects."""
2 |
--------------------------------------------------------------------------------
/docs/_static/logo/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/logo/logo.png
--------------------------------------------------------------------------------
/docs/_static/parametric.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/parametric.png
--------------------------------------------------------------------------------
/docs/_static/logo/skpro-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/logo/skpro-banner.png
--------------------------------------------------------------------------------
/docs/_static/pymc_example_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/pymc_example_plot.png
--------------------------------------------------------------------------------
/docs/source/images/skpro-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/source/images/skpro-banner.png
--------------------------------------------------------------------------------
/docs/_static/simple_example_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/simple_example_plot.png
--------------------------------------------------------------------------------
/docs/source/related_software.rst:
--------------------------------------------------------------------------------
1 | .. _related_software:
2 |
3 | ================
4 | Related Software
5 | ================
6 |
7 | TODO
8 |
--------------------------------------------------------------------------------
/skpro/regression/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | """Adapters for probabilistic regressors."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
--------------------------------------------------------------------------------
/skpro/__init__.py:
--------------------------------------------------------------------------------
1 | """skpro."""
2 |
3 | __version__ = "2.11.0"
4 |
5 | __all__ = ["show_versions"]
6 |
7 | from skpro.utils._maint._show_versions import show_versions
8 |
--------------------------------------------------------------------------------
/skpro/distributions/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | """Adapters for probability distribution objects."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
--------------------------------------------------------------------------------
/skpro/survival/tree/__init__.py:
--------------------------------------------------------------------------------
1 | """Cox proportional hazards models."""
2 |
3 | from skpro.survival.tree._tree_sksurv import SurvivalTree
4 |
5 | __all__ = ["SurvivalTree"]
6 |
--------------------------------------------------------------------------------
/skpro/metrics/survival/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for metrics for time-to-event or survival prediction."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
--------------------------------------------------------------------------------
/skpro/survival/additive/__init__.py:
--------------------------------------------------------------------------------
1 | """Generalized additive survival models."""
2 |
3 | __all__ = ["AalenAdditive"]
4 |
5 | from skpro.survival.additive._aalen_lifelines import AalenAdditive
6 |
--------------------------------------------------------------------------------
/skpro/distributions/adapters/scipy/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for adapters for probability distribution objects, scipy facing."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
--------------------------------------------------------------------------------
/skpro/model_selection/__init__.py:
--------------------------------------------------------------------------------
1 | """Tuning and model selection."""
2 |
3 | __all__ = ["GridSearchCV", "RandomizedSearchCV"]
4 |
5 | from skpro.model_selection._tuning import GridSearchCV, RandomizedSearchCV
6 |
--------------------------------------------------------------------------------
/skpro/datatypes/_base/__init__.py:
--------------------------------------------------------------------------------
1 | """Base module for datatypes."""
2 |
3 | from skpro.datatypes._base._base import BaseConverter, BaseDatatype, BaseExample
4 |
5 | __all__ = ["BaseConverter", "BaseDatatype", "BaseExample"]
6 |
--------------------------------------------------------------------------------
/docs/source/_static/class.rst:
--------------------------------------------------------------------------------
1 | {{objname}}
2 | {{ underline }}==============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | .. raw:: html
9 |
10 |
11 |
--------------------------------------------------------------------------------
/skpro/regression/gam/__init__.py:
--------------------------------------------------------------------------------
1 | """GAM regressor using pyGAM."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.regression.gam._gam import GAMRegressor
5 |
6 | __all__ = ["GAMRegressor"]
7 |
--------------------------------------------------------------------------------
/docs/source/_static/function.rst:
--------------------------------------------------------------------------------
1 | {{objname}}
2 | {{ underline }}====================
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autofunction:: {{ objname }}
7 |
8 | .. raw:: html
9 |
10 |
11 |
--------------------------------------------------------------------------------
/skpro/regression/gp/__init__.py:
--------------------------------------------------------------------------------
1 | """Gaussian process models."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.regression.gp._sklearn import GaussianProcess
5 |
6 | __all__ = ["GaussianProcess"]
7 |
--------------------------------------------------------------------------------
/skpro/regression/jackknife/__init__.py:
--------------------------------------------------------------------------------
1 | """MAPIE Jackknife Regressors."""
2 |
3 | from skpro.regression.jackknife._mapie_jackknife import (
4 | MapieJackknifeAfterBootstrapRegressor,
5 | )
6 |
7 | __all__ = ["MapieJackknifeAfterBootstrapRegressor"]
8 |
--------------------------------------------------------------------------------
/skpro/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utility functionality."""
2 |
3 | from skpro.utils.deep_equals import deep_equals
4 | from skpro.utils.estimator_checks import check_estimator
5 |
6 | __all__ = [
7 | "check_estimator",
8 | "deep_equals",
9 | ]
10 |
--------------------------------------------------------------------------------
/skpro/distributions/compose/__init__.py:
--------------------------------------------------------------------------------
1 | """Probability distribution objects."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 | # adapted from sktime
4 |
5 | __all__ = ["IID"]
6 |
7 | from skpro.distributions.compose._iid import IID
8 |
--------------------------------------------------------------------------------
/skpro/utils/deep_equals/__init__.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Module for nested equality checking."""
3 | from skpro.utils.deep_equals._deep_equals import deep_equals
4 |
5 | __all__ = [
6 | "deep_equals",
7 | ]
8 |
--------------------------------------------------------------------------------
/docs/source/_static/class_with_call.rst:
--------------------------------------------------------------------------------
1 | {{objname}}
2 | {{ underline }}===============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 | :special-members: __call__
8 |
9 | .. raw:: html
10 |
11 |
12 |
--------------------------------------------------------------------------------
/skpro/tests/_config_test_dummy.py:
--------------------------------------------------------------------------------
1 | """Test dummy for testing config skips."""
2 |
3 |
4 | from skpro.regression.base import BaseProbaRegressor # noqa: E402
5 |
6 |
7 | class DummySkipped(BaseProbaRegressor):
8 | """Dummy regressor to test exclusion."""
9 |
10 | pass
11 |
--------------------------------------------------------------------------------
/skpro/survival/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | """Module containing adapters other framework packages covering multiple tasks."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | __all__ = ["_SksurvAdapter"]
5 |
6 | from skpro.survival.adapters.sksurv import _SksurvAdapter
7 |
--------------------------------------------------------------------------------
/skpro/regression/binned/__init__.py:
--------------------------------------------------------------------------------
1 | """Reduction to probabilistic classification."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.regression.binned._sklearn_bin_regressor import HistBinnedProbaRegressor
5 |
6 | __all__ = ["HistBinnedProbaRegressor"]
7 |
--------------------------------------------------------------------------------
/skpro/distributions/trafo/__init__.py:
--------------------------------------------------------------------------------
1 | """Probability distribution objects."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 | # adapted from sktime
4 |
5 | __all__ = ["TransformedDistribution"]
6 |
7 | from skpro.distributions.trafo._transformed import TransformedDistribution
8 |
--------------------------------------------------------------------------------
/skpro/regression/adapters/ngboost/__init__.py:
--------------------------------------------------------------------------------
1 | """Adapters for probabilistic regressors, towards sklearn."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.regression.adapters.ngboost._ngboost_proba import NGBoostAdapter
5 |
6 | __all__ = ["NGBoostAdapter"]
7 |
--------------------------------------------------------------------------------
/skpro/regression/adapters/sklearn/__init__.py:
--------------------------------------------------------------------------------
1 | """Adapters for probabilistic regressors, towards sklearn."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.regression.adapters.sklearn._sklearn_proba import SklearnProbaReg
5 |
6 | __all__ = ["SklearnProbaReg"]
7 |
--------------------------------------------------------------------------------
/docs/source/about/roadmap.rst:
--------------------------------------------------------------------------------
1 | .. _roadmap:
2 |
3 | =======
4 | Roadmap
5 | =======
6 |
7 | Welcome to ``skbase``'s roadmap.
8 |
9 | .. note::
10 |
11 | The project is under active planning and development. We will continue to update
12 | our roadmap as the project matures and we plan future work.
13 |
--------------------------------------------------------------------------------
/skpro/base/__init__.py:
--------------------------------------------------------------------------------
1 | """Base module with base classes BaseObject, BaseEstimator."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | __all__ = ["BaseEstimator", "BaseMetaEstimator", "BaseObject"]
5 |
6 | from skpro.base._base import BaseEstimator, BaseMetaEstimator, BaseObject
7 |
--------------------------------------------------------------------------------
/skpro/distributions/adapters/statsmodels/__init__.py:
--------------------------------------------------------------------------------
1 | """Adapters for probability distribution objects, statsmodels facing."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.distributions.adapters.statsmodels._empirical import empirical_from_rvdf
5 |
6 | __all__ = ["empirical_from_rvdf"]
7 |
--------------------------------------------------------------------------------
/.github/workflows/cancel.yml:
--------------------------------------------------------------------------------
1 | name: Cancel
2 | on:
3 | workflow_run:
4 | workflows: ["Test"]
5 | types:
6 | - requested
7 | jobs:
8 | cancel:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: styfle/cancel-workflow-action@0.12.1
12 | with:
13 | workflow_id: ${{ github.event.workflow.id }}
14 |
--------------------------------------------------------------------------------
/skpro/regression/compose/__init__.py:
--------------------------------------------------------------------------------
1 | """Composition and pipelines for probabilistic supervised regression."""
2 |
3 | from skpro.regression.compose._pipeline import Pipeline
4 | from skpro.regression.compose._ttr import TransformedTargetRegressor
5 |
6 | __all__ = [
7 | "Pipeline",
8 | "TransformedTargetRegressor",
9 | ]
10 |
--------------------------------------------------------------------------------
/skpro/regression/base/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | """Base classes for adapting probabilistic regressors to the skproframework."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | __all__ = ["_DelegateWithFittedParamForwarding"]
5 |
6 | from skpro.regression.base.adapters._sklearn import _DelegateWithFittedParamForwarding
7 |
--------------------------------------------------------------------------------
/skpro/regression/ensemble/__init__.py:
--------------------------------------------------------------------------------
1 | """Natural Gradient Boosting Regressor models."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.regression.ensemble._bagging import BaggingRegressor
5 | from skpro.regression.ensemble._ngboost import NGBoostRegressor
6 |
7 | __all__ = ["BaggingRegressor", "NGBoostRegressor"]
8 |
--------------------------------------------------------------------------------
/docs/source/about/team.rst:
--------------------------------------------------------------------------------
1 | .. _team:
2 |
3 | ================
4 | Development Team
5 | ================
6 |
7 | This package is currently maintained by the ``sktime`` community, see
8 | `sktime team `_.
9 |
10 | This project is currently to be considered part of ``sktime``,
11 | and not a separate entity.
12 |
--------------------------------------------------------------------------------
/skpro/regression/base/__init__.py:
--------------------------------------------------------------------------------
1 | """Base classes for probabilistic regression."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | __all__ = ["BaseProbaRegressor", "_DelegatedProbaRegressor"]
5 |
6 | from skpro.regression.base._base import BaseProbaRegressor
7 | from skpro.regression.base._delegate import _DelegatedProbaRegressor
8 |
--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | Developers
3 | ==========
4 |
5 | **skpro** is developed by the sktime community.
6 |
7 | We follow the all-contributors specification for giving credit.
8 | Contributions of any kind are welcome!
9 |
10 | For a list of contributors, see the file
11 | `all-contributorsrc `_.
12 |
--------------------------------------------------------------------------------
/skpro/survival/coxph/__init__.py:
--------------------------------------------------------------------------------
1 | """Cox proportional hazards models."""
2 |
3 | from skpro.survival.coxph._coxnet_sksurv import CoxNet
4 | from skpro.survival.coxph._coxph_lifelines import CoxPHlifelines
5 | from skpro.survival.coxph._coxph_sksurv import CoxPHSkSurv
6 | from skpro.survival.coxph._coxph_statsmodels import CoxPH
7 |
8 | __all__ = ["CoxNet", "CoxPH", "CoxPHlifelines", "CoxPHSkSurv"]
9 |
--------------------------------------------------------------------------------
/skpro/metrics/survival/__init__.py:
--------------------------------------------------------------------------------
1 | """Metrics for time-to-event or survival prediction."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | __author__ = ["fkiraly"]
5 |
6 | __all__ = [
7 | "ConcordanceHarrell",
8 | "SPLL",
9 | ]
10 |
11 | from skpro.metrics.survival._c_harrell import ConcordanceHarrell
12 | from skpro.metrics.survival._spll import SPLL
13 |
--------------------------------------------------------------------------------
/skpro/datatypes/_table/__init__.py:
--------------------------------------------------------------------------------
1 | """Module exports: Series type checkers, converters and mtype inference."""
2 |
3 | from skpro.datatypes._table._convert import convert_dict as convert_dict_Table
4 | from skpro.datatypes._table._registry import MTYPE_LIST_TABLE, MTYPE_REGISTER_TABLE
5 |
6 | __all__ = [
7 | "convert_dict_Table",
8 | "MTYPE_LIST_TABLE",
9 | "MTYPE_REGISTER_TABLE",
10 | ]
11 |
--------------------------------------------------------------------------------
/skpro/survival/compose/__init__.py:
--------------------------------------------------------------------------------
1 | """Survival or time-to-event prediction estimators, composers."""
2 |
3 | from skpro.regression.compose import Pipeline
4 | from skpro.survival.compose._reduce_cond_unc import ConditionUncensored
5 | from skpro.survival.compose._reduce_uncensored import FitUncensored
6 |
7 | __all__ = [
8 | "Pipeline",
9 | "FitUncensored",
10 | "ConditionUncensored",
11 | ]
12 |
--------------------------------------------------------------------------------
/skpro/distributions/adapters/scipy/__init__.py:
--------------------------------------------------------------------------------
1 | """Adapters for probability distribution objects, scipy facing."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.distributions.adapters.scipy._distribution import _ScipyAdapter
5 | from skpro.distributions.adapters.scipy._empirical import empirical_from_discrete
6 |
7 | __all__ = ["empirical_from_discrete", "_ScipyAdapter"]
8 |
--------------------------------------------------------------------------------
/skpro/survival/aft/__init__.py:
--------------------------------------------------------------------------------
1 | """Module containing accelerated failure time models."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | __all__ = ["AFTFisk", "AFTLogNormal", "AFTWeibull"]
5 |
6 | from skpro.survival.aft._aft_lifelines_fisk import AFTFisk
7 | from skpro.survival.aft._aft_lifelines_lognormal import AFTLogNormal
8 | from skpro.survival.aft._aft_lifelines_weibull import AFTWeibull
9 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | version: 2
5 |
6 | python:
7 | install:
8 | - method: pip
9 | path: .
10 | extra_requirements:
11 | - docs
12 | build:
13 | os: ubuntu-22.04
14 | tools:
15 | python: "3.11"
16 |
17 | sphinx:
18 | configuration: docs/source/conf.py
19 | # fail_on_warning: True
20 |
--------------------------------------------------------------------------------
/skpro/regression/online/__init__.py:
--------------------------------------------------------------------------------
1 | """Meta-algorithms to build online regression models."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.regression.online._dont_refit import OnlineDontRefit
5 | from skpro.regression.online._refit import OnlineRefit
6 | from skpro.regression.online._refit_every import OnlineRefitEveryN
7 |
8 | __all__ = ["OnlineDontRefit", "OnlineRefit", "OnlineRefitEveryN"]
9 |
--------------------------------------------------------------------------------
/skpro/regression/bayesian/__init__.py:
--------------------------------------------------------------------------------
1 | """Base classes for Bayesian probabilistic regression."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | __all__ = [
5 | "BayesianConjugateLinearRegressor",
6 | "BayesianLinearRegressor",
7 | ]
8 |
9 | from skpro.regression.bayesian._linear_conjugate import BayesianConjugateLinearRegressor
10 | from skpro.regression.bayesian._linear_mcmc import BayesianLinearRegressor
11 |
--------------------------------------------------------------------------------
/docs/source/about/mission.rst:
--------------------------------------------------------------------------------
1 | .. _mission:
2 |
3 | =======
4 | Mission
5 | =======
6 |
7 | The goal of the ``skpro`` project is to provide a unified package for
8 | using, building, and evaluating predictive probabilistic machine learning models,
9 | following `scikit-learn`_ and `sktime`_ design principles.
10 |
11 | The wider (non-technical) mission is identical with that of `sktime`_:
12 |
13 | .. _sktime: https://www.sktime.net/en/stable/index.html
14 |
--------------------------------------------------------------------------------
/docs/source/api_reference/base.rst:
--------------------------------------------------------------------------------
1 | .. _base_ref:
2 |
3 | Base
4 | ====
5 |
6 | The :mod:`skpro.base` module contains abstract base classes.
7 |
8 | .. automodule:: skpro.base
9 | :no-members:
10 | :no-inherited-members:
11 |
12 | Base classes
13 | ------------
14 |
15 | .. currentmodule:: skpro.base
16 |
17 | .. autosummary::
18 | :toctree: auto_generated/
19 | :template: class.rst
20 |
21 | BaseObject
22 | BaseEstimator
23 | BaseMetaEstimator
24 |
--------------------------------------------------------------------------------
/docs/source/developer_guide/add_estimators.rst:
--------------------------------------------------------------------------------
1 | .. _developer_guide_add_estimators:
2 |
3 | =======================
4 | Implementing Estimators
5 | =======================
6 |
7 | ``skpro`` follows the same extension principles as ``sktime`` - we advise to read the ``sktime`` documentation on this topic:
8 | `here `__
9 |
10 | The same workflows apply, using ``skpro`` extension templates and ``check_estimator``.
11 |
--------------------------------------------------------------------------------
/skpro/distributions/base/__init__.py:
--------------------------------------------------------------------------------
1 | """Probability distribution objects."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 | # adapted from sktime
4 |
5 | __all__ = ["BaseDistribution", "_DelegatedDistribution", "_BaseArrayDistribution"]
6 |
7 | from skpro.distributions.base._base import BaseDistribution
8 | from skpro.distributions.base._base_array import _BaseArrayDistribution
9 | from skpro.distributions.base._delegate import _DelegatedDistribution
10 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/report-all-other-issues-or-questions.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Report all other issues or questions
3 | about: Let us know about anything else not covered by one of our specific issue types.
4 | title: ''
5 | labels: needs triage
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Desbribe the issue**
11 |
15 |
--------------------------------------------------------------------------------
/docs/source/tutorials.rst:
--------------------------------------------------------------------------------
1 | .. _tutorials:
2 |
3 | Tutorials
4 | =========
5 |
6 | Below are introductory tutorials for ``skpro``.
7 |
8 | Each tutorial is located in its own repository, which contains notebooks and links to a youtube video walkthrough.
9 |
10 | .. note::
11 |
12 | There are no video tutorials yet! Stay tuned...
13 |
14 | For user guides specific to learning tasks, see our :ref:`user_guide` page.
15 | The user guide notebooks are always functional with the most recent stable version.
16 |
--------------------------------------------------------------------------------
/skpro/datatypes/_proba/__init__.py:
--------------------------------------------------------------------------------
1 | """Type checkers, converters and mtype inference for probabilistic return types."""
2 |
3 | from skpro.datatypes._proba._check import check_dict as check_dict_Proba
4 | from skpro.datatypes._proba._convert import convert_dict as convert_dict_Proba
5 | from skpro.datatypes._proba._registry import MTYPE_LIST_PROBA, MTYPE_REGISTER_PROBA
6 |
7 | __all__ = [
8 | "check_dict_Proba",
9 | "convert_dict_Proba",
10 | "MTYPE_LIST_PROBA",
11 | "MTYPE_REGISTER_PROBA",
12 | ]
13 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "pip"
4 | directory: "/"
5 | schedule:
6 | interval: "daily"
7 | commit-message:
8 | prefix: "[MNT] [Dependabot]"
9 | include: "scope"
10 | labels:
11 | - "maintenance"
12 | - package-ecosystem: "github-actions"
13 | directory: "/"
14 | schedule:
15 | interval: "daily"
16 | commit-message:
17 | prefix: "[MNT] [Dependabot]"
18 | include: "scope"
19 | labels:
20 | - "maintenance"
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/maintenance-issue.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Maintenance issue
3 | about: Suggest a maintenance update
4 | title: "[MNT]"
5 | labels: maintenance
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the maintenance issue**
11 |
12 |
17 |
--------------------------------------------------------------------------------
/skpro/regression/__init__.py:
--------------------------------------------------------------------------------
1 | """Probabilitistic supervised regression estimators."""
2 |
3 | from skpro.regression.conformal import (
4 | MapieConformalizedQuantileRegressor,
5 | MapieCrossConformalRegressor,
6 | MapieSplitConformalRegressor,
7 | )
8 | from skpro.regression.jackknife import MapieJackknifeAfterBootstrapRegressor
9 |
10 | __all__ = [
11 | "MapieSplitConformalRegressor",
12 | "MapieCrossConformalRegressor",
13 | "MapieConformalizedQuantileRegressor",
14 | "MapieJackknifeAfterBootstrapRegressor",
15 | ]
16 |
--------------------------------------------------------------------------------
/skpro/regression/conformal/__init__.py:
--------------------------------------------------------------------------------
1 | """MAPIE Conformal Regressors."""
2 |
3 | from skpro.regression.conformal._mapie_cqr import MapieConformalizedQuantileRegressor
4 | from skpro.regression.conformal._mapie_cross_conformal import (
5 | MapieCrossConformalRegressor,
6 | )
7 | from skpro.regression.conformal._mapie_split_conformal import (
8 | MapieSplitConformalRegressor,
9 | )
10 |
11 | __all__ = [
12 | "MapieSplitConformalRegressor",
13 | "MapieCrossConformalRegressor",
14 | "MapieConformalizedQuantileRegressor",
15 | ]
16 |
--------------------------------------------------------------------------------
/docs/source/api_reference.rst:
--------------------------------------------------------------------------------
1 | .. _api_reference:
2 |
3 | =============
4 | API Reference
5 | =============
6 |
7 | Welcome to the API reference for ``skpro``.
8 |
9 | The API reference provides a technical manual.
10 | It describes the classes and functions included in ``skpro``.
11 |
12 | .. include:: includes/api_css.rst
13 |
14 | .. toctree::
15 | :maxdepth: 1
16 |
17 | api_reference/regression
18 | api_reference/survival
19 | api_reference/distributions
20 | api_reference/metrics
21 | api_reference/base
22 | api_reference/utils
23 |
--------------------------------------------------------------------------------
/skpro/registry/__init__.py:
--------------------------------------------------------------------------------
1 | """Registry and lookup functionality."""
2 |
3 | from skpro.registry._craft import craft, deps, imports
4 | from skpro.registry._lookup import all_objects, all_tags
5 | from skpro.registry._scitype import scitype
6 | from skpro.registry._tags import (
7 | OBJECT_TAG_LIST,
8 | OBJECT_TAG_REGISTER,
9 | check_tag_is_valid,
10 | )
11 |
12 | __all__ = [
13 | "OBJECT_TAG_LIST",
14 | "OBJECT_TAG_REGISTER",
15 | "all_objects",
16 | "all_tags",
17 | "check_tag_is_valid",
18 | "craft",
19 | "deps",
20 | "imports",
21 | "scitype",
22 | ]
23 |
--------------------------------------------------------------------------------
/skpro/survival/ensemble/__init__.py:
--------------------------------------------------------------------------------
1 | """Cox proportional hazards models."""
2 |
3 | from skpro.survival.ensemble._grad_boost_sksurv import (
4 | SurvGradBoostCompSkSurv,
5 | SurvGradBoostSkSurv,
6 | )
7 | from skpro.survival.ensemble._ngboost_surv import NGBoostSurvival
8 | from skpro.survival.ensemble._survforest_sksurv import (
9 | SurvivalForestSkSurv,
10 | SurvivalForestXtraSkSurv,
11 | )
12 |
13 | __all__ = [
14 | "SurvGradBoostSkSurv",
15 | "SurvGradBoostCompSkSurv",
16 | "SurvivalForestSkSurv",
17 | "SurvivalForestXtraSkSurv",
18 | "NGBoostSurvival",
19 | ]
20 |
--------------------------------------------------------------------------------
/skpro/tests/_config.py:
--------------------------------------------------------------------------------
1 | """Test configs."""
2 |
3 | # --------------------
4 | # configs for test run
5 | # --------------------
6 |
7 | # whether to test only estimators from modules that are changed w.r.t. main
8 | # default is False, can be set to True by pytest --only_changed_modules True flag
9 | ONLY_CHANGED_MODULES = False
10 |
11 |
12 | # list of str, names of estimators to exclude from testing
13 | # WARNING: tests for these estimators will be skipped
14 | EXCLUDE_ESTIMATORS = [
15 | "DummySkipped",
16 | "ClassName", # exclude classes from extension templates
17 | ]
18 |
19 |
20 | EXCLUDED_TESTS = {}
21 |
--------------------------------------------------------------------------------
/skpro/datatypes/_proba/_registry.py:
--------------------------------------------------------------------------------
1 | """Registry of mtypes for Proba scitype.
2 |
3 | See datatypes._registry for API.
4 | """
5 |
6 | import pandas as pd
7 |
8 | __all__ = [
9 | "MTYPE_REGISTER_PROBA",
10 | "MTYPE_LIST_PROBA",
11 | ]
12 |
13 |
14 | MTYPE_REGISTER_PROBA = [
15 | ("pred_interval", "Proba", "predictive intervals"),
16 | ("pred_quantiles", "Proba", "quantile predictions"),
17 | ("pred_var", "Proba", "variance predictions"),
18 | # ("pred_dost", "Proba", "full distribution predictions, tensorflow-probability"),
19 | ]
20 |
21 | MTYPE_LIST_PROBA = pd.DataFrame(MTYPE_REGISTER_PROBA)[0].values
22 |
--------------------------------------------------------------------------------
/.github/.codecov.yml:
--------------------------------------------------------------------------------
1 | # paths to ignore
2 | ignore:
3 | - "docs/**/*"
4 | - "build_tools/**/*"
5 | - "examples/*"
6 | - ".github/*"
7 | - ".binder/*"
8 | - "extension_templates/*"
9 | - "*.md"
10 | - "*.yml"
11 | - "*.yaml"
12 |
13 | # PR status check
14 | coverage:
15 | status:
16 | project:
17 | default:
18 | # threshold: 1%
19 | informational: true
20 | patch:
21 | default:
22 | informational: true
23 |
24 | # post coverage report as comment on PR
25 | comment: false
26 |
27 | # enable codecov to report to GitHub status checks
28 | github_checks:
29 | annotations: false
30 |
--------------------------------------------------------------------------------
/skpro/utils/index.py:
--------------------------------------------------------------------------------
1 | """Utility functions for working with indices."""
2 |
3 | import numpy as np
4 |
5 |
6 | def random_ss_ix(ix, size, replace=True):
7 | """Randomly uniformly sample indices from a list of indices.
8 |
9 | Parameters
10 | ----------
11 | ix : pd.Index or subsettable iterable via getitem
12 | list of indices to sample from
13 | size : int
14 | number of indices to sample
15 | replace : bool, default=True
16 | whether to sample with replacement
17 | """
18 | a = range(len(ix))
19 | ixs = ix[np.random.choice(a, size=size, replace=replace)]
20 | return ixs
21 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | # .coveragerc to control coverage.py
2 | [run]
3 | branch = True
4 | source = skpro
5 | # omit = bad_file.py
6 |
7 | [report]
8 | # Regexes for lines to exclude from consideration
9 | exclude_lines =
10 | # Have to re-enable the standard pragma
11 | pragma: no cover
12 |
13 | # Don't complain about missing debug-only code:
14 | def __repr__
15 | if self\.debug
16 |
17 | # Don't complain if tests don't hit defensive assertion code:
18 | raise AssertionError
19 | raise NotImplementedError
20 |
21 | # Don't complain if non-runnable code isn't run:
22 | if 0:
23 | if __name__ == .__main__.:
24 |
--------------------------------------------------------------------------------
/CITATION.rst:
--------------------------------------------------------------------------------
1 | Gressmann, F., Király, F. J., Mateen, B., & Oberhauser, H. (2018). Probabilistic supervised learning. ArXiv:1801.00753 [Cs, Math, Stat]. Retrieved from http://arxiv.org/abs/1801.00753 ::
2 |
3 | @article{skpro,
4 | archivePrefix = {arXiv},
5 | eprinttype = {arxiv},
6 | eprint = {1801.00753},
7 | primaryClass = {cs, math, stat},
8 | title = {Probabilistic Supervised Learning},
9 | url = {http://arxiv.org/abs/1801.00753},
10 | urldate = {2018-01-03},
11 | date = {2018-01-02},
12 | author = {Gressmann, Frithjof and Kir{\'a}ly, Franz J. and Mateen, Bilal and Oberhauser, Harald}
13 | }
14 |
--------------------------------------------------------------------------------
/docs/source/about/governance.rst:
--------------------------------------------------------------------------------
1 | .. _governance:
2 |
3 | ==========
4 | Governance
5 | ==========
6 |
7 | .. topic:: This project is part of the ``sktime`` project.
8 |
9 | The ``skpro`` repository and community is currently to be considered part of
10 | ``sktime``, and not a separate entity. It is maintained by the ``sktime`` team.
11 | It is THEREFORE subject to rules and provisions of ``sktime``,
12 | see `sktime governance `_.
13 | The below are draft documents for a potentially later stage, copied from ``sktime``.
14 | In case of discrepancy, ``sktime`` documents apply.
15 |
--------------------------------------------------------------------------------
/skpro/regression/linear/__init__.py:
--------------------------------------------------------------------------------
1 | """Linear regression models."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.regression.dummy import DummyProbaRegressor
5 | from skpro.regression.linear._glm import GLMRegressor
6 | from skpro.regression.linear._glum import GlumRegressor
7 | from skpro.regression.linear._sklearn import ARDRegression, BayesianRidge
8 | from skpro.regression.linear._sklearn_poisson import PoissonRegressor
9 |
10 | __all__ = [
11 | "ARDRegression",
12 | "BayesianRidge",
13 | "GLMRegressor",
14 | "GlumRegressor",
15 | "PoissonRegressor",
16 | "DummyProbaRegressor",
17 | ]
18 |
--------------------------------------------------------------------------------
/skpro/utils/numpy.py:
--------------------------------------------------------------------------------
1 | """Utility functions for numpy/sklearn related matters."""
2 |
3 | __authors__ = ["fkiraly"]
4 |
5 |
6 | def flatten_to_1D_if_colvector(y):
7 | """Flattens a numpy array to 1D if it is a 2D column vector.
8 |
9 | Parameters
10 | ----------
11 | y : numpy array, 1D or 2D
12 | Array to flatten
13 |
14 | Returns
15 | -------
16 | y_flat : numpy array
17 | 1D flattened array if y was 2D column vector, or 1D already
18 | otherwise, return y unchanged
19 | """
20 | if len(y.shape) == 2 and y.shape[1] == 1:
21 | y_flat = y.flatten()
22 | else:
23 | y_flat = y
24 |
25 | return y_flat
26 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation-issue.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Documentation issue
3 | about: Suggest how we can improve our documentation
4 | title: "[DOC]"
5 | labels: documentation
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the documentation issue**
11 |
12 |
15 |
16 | **Suggest a concrete fix/improvement**
17 |
18 |
21 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # The file specifies framework level core developers for automated review requests
2 | #
3 | # Note: historically, this file has been used to maintain a list of
4 | # algorithm maintainers as specified in GOVERNANCE.md.
5 | # This is no longer the case, algorithm maintainers are now
6 | # specified directly in the estimator,
7 | # in the "maintainers" tag of the respective scikit-base object.
8 | #
9 | # Algorithm maintainers are programmatically queryable
10 | # via Estimator.get_class_tag("maintainers").
11 | # Further lookup such as "which algorithms does M maintain"
12 | # can be carried out using registry.all_objects
13 |
14 | * @benheid @felipeangelimvieira @fkiraly @fnhirwa @geetu040 @pranavvp16 @sairevanth25 @XinyuWuu
15 |
--------------------------------------------------------------------------------
/docs/source/get_started.rst:
--------------------------------------------------------------------------------
1 | .. _getting_started:
2 |
3 | ===========
4 | Get Started
5 | ===========
6 |
7 | The following information is designed to get users up and running with
8 | ``skpro`` quickly. For more detailed information, see the links in each
9 | of the subsections.
10 |
11 | Installation
12 | ============
13 |
14 | ``skpro`` currently supports:
15 |
16 | * environments with python version 3.8, 3.9, 3.10, 3.11, or 3.12.
17 | * operating systems Mac OS X, Unix-like OS, Windows 8.1 and higher
18 | * installation via ``PyPi`` or ``conda``
19 |
20 | Please see the :ref:`installation ` guide for step-by-step instructions on the package installation.
21 |
22 | .. _scikit-learn: https://scikit-learn.org/stable/index.html
23 |
--------------------------------------------------------------------------------
/skpro/utils/pandas.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3 -u
2 | """Utilities for pandas adapbation."""
3 |
4 | __author__ = ["fkiraly"]
5 |
6 |
7 | def df_map(x):
8 | """Access map or applymap, of DataFrame.
9 |
10 | In pandas 2.1.0, applymap was deprecated in favor of the newly introduced map.
11 | To ensure compatibility with older versions, we use map if available,
12 | otherwise applymap.
13 |
14 | Parameters
15 | ----------
16 | x : assumed pd.DataFrame
17 |
18 | Returns
19 | -------
20 | x.map, if available, otherwise x.applymap
21 | Note: returns method itself, not result of method call
22 | """
23 | if hasattr(x, "map"):
24 | return x.map
25 | else:
26 | return x.applymap
27 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Temporary and binary files
2 | *~
3 | *.py[cod]
4 | *.so
5 | *.cfg
6 | !setup.cfg
7 | *.orig
8 | *.log
9 | *.pot
10 | __pycache__/*
11 | .cache/*
12 | .*.swp
13 | */.ipynb_checkpoints/*
14 |
15 | # Project files
16 | .ropeproject
17 | .project
18 | .pydevproject
19 | .settings
20 | .idea
21 | .vscode
22 |
23 | # Package files
24 | *.egg
25 | *.eggs/
26 | .installed.cfg
27 | *.egg-info
28 |
29 | # Unittest and coverage
30 | htmlcov/*
31 | .coverage
32 | .tox
33 | junit.xml
34 | coverage.xml
35 | tests/.hypothesis/
36 | .hypothesis/
37 |
38 | # Build and docs folder/files
39 | build/*
40 | dist/*
41 | sdist/*
42 | docs/api/*
43 | docs/_build/*
44 | cover/*
45 | MANIFEST
46 |
47 | # Virtual environments
48 | .venv/
49 | venv/
50 | env/
51 | ENV/
52 |
--------------------------------------------------------------------------------
/.binder/Dockerfile:
--------------------------------------------------------------------------------
1 | # This Dockerfile is used to build skpro when launching binder.
2 | # Find out more at: https://mybinder.readthedocs.io/en/latest/index.html
3 |
4 | FROM jupyter/scipy-notebook:python-3.11.6
5 | # Set up user to avoid running as root
6 | ARG NB_USER
7 | ARG NB_UID
8 | ENV USER ${NB_USER}
9 | ENV HOME /home/${NB_USER}
10 |
11 | # Binder will automatically clone the repo, but we need to make sure the
12 | # contents of our repo are in the ${HOME} directory
13 | COPY . ${HOME}
14 | USER root
15 | RUN chown -R ${NB_UID} ${HOME}
16 |
17 | # Switch user and directory
18 | USER ${USER}
19 | WORKDIR ${HOME}
20 |
21 | # Install extra requirements and skpro based on main branch
22 | RUN pip install --upgrade pip --no-cache-dir && pip install .[binder]
23 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: "[ENH]"
5 | labels: feature request
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 |
14 |
15 | **Describe the solution you'd like**
16 |
19 |
20 | **Describe alternatives you've considered**
21 |
24 |
25 |
26 | **Additional context**
27 |
30 |
--------------------------------------------------------------------------------
/skpro/tests/utils.py:
--------------------------------------------------------------------------------
1 | """Legacy module: test utils."""
2 | # LEGACY MODULE - TODO: remove or refactor
3 |
4 | import numpy as np
5 |
6 |
7 | def assert_close_prediction(y_hat, y_true, fraction=0.75, within=0.25):
8 | """Check that defined fraction of predictions lies in a certain tolerance.
9 |
10 | Parameters
11 | ----------
12 | y_hat Predictions
13 | y_true True values
14 | fraction Fraction of close values
15 | within Relative tolerance to assume when comparing the values
16 |
17 | Raises
18 | ------
19 | AssertionError
20 | """
21 | predictions_within_tolerance = np.count_nonzero(
22 | np.isclose(y_hat, y_true, rtol=within)
23 | )
24 | target = len(y_true) * fraction
25 |
26 | assert predictions_within_tolerance > target
27 |
--------------------------------------------------------------------------------
/docs/source/includes/api_css.rst:
--------------------------------------------------------------------------------
1 | ..
2 | File to ..include in the API ref document.
3 |
4 | .. raw:: html
5 |
6 |
30 |
--------------------------------------------------------------------------------
/skpro/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | """Metrics for probabilistic supervised regression."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 | # adapted from sktime
4 |
5 | __author__ = ["fkiraly", "euanenticott-shell"]
6 |
7 | __all__ = [
8 | "CRPS",
9 | "AUCalibration",
10 | "ConstraintViolation",
11 | "EmpiricalCoverage",
12 | "IntervalWidth",
13 | "LogLoss",
14 | "LinearizedLogLoss",
15 | "PinballLoss",
16 | "SquaredDistrLoss",
17 | # survival metrics
18 | "ConcordanceHarrell",
19 | "SPLL",
20 | ]
21 |
22 | from skpro.metrics._classes import (
23 | CRPS,
24 | AUCalibration,
25 | ConstraintViolation,
26 | EmpiricalCoverage,
27 | IntervalWidth,
28 | LinearizedLogLoss,
29 | LogLoss,
30 | PinballLoss,
31 | SquaredDistrLoss,
32 | )
33 | from skpro.metrics.survival import SPLL, ConcordanceHarrell
34 |
--------------------------------------------------------------------------------
/skpro/utils/sklearn.py:
--------------------------------------------------------------------------------
1 | """Utility functions for adapting to sklearn."""
2 |
3 | import numpy as np
4 |
5 |
6 | def prep_skl_df(df, copy_df=False):
7 | """Make df compatible with sklearn input expectations.
8 |
9 | Changes:
10 | turns column index into a list of strings
11 |
12 | Parameters
13 | ----------
14 | df : pd.DataFrame
15 | list of indices to sample from
16 | copy_df : bool, default=False
17 | whether to mutate df or return a copy
18 | if False, index of df is mutated
19 | if True, original df is not mutated. If index is not a list of strings,
20 | a copy is made and the copy is mutated. Otherwise, the original df is returned.
21 | """
22 | cols = df.columns
23 | str_cols = cols.astype(str)
24 |
25 | if not np.all(str_cols == cols):
26 | if copy_df:
27 | df = df.copy()
28 | df.columns = str_cols
29 |
30 | return df
31 |
--------------------------------------------------------------------------------
/skpro/datatypes/_table/_registry.py:
--------------------------------------------------------------------------------
1 | """Registry of mtypes for Table scitype.
2 |
3 | See datatypes._registry for API.
4 | """
5 |
6 | import pandas as pd
7 |
8 | __all__ = [
9 | "MTYPE_REGISTER_TABLE",
10 | "MTYPE_LIST_TABLE",
11 | ]
12 |
13 |
14 | MTYPE_REGISTER_TABLE = [
15 | ("pd_DataFrame_Table", "Table", "pd.DataFrame representation of a data table"),
16 | ("numpy1D", "Table", "1D np.narray representation of a univariate table"),
17 | ("numpy2D", "Table", "2D np.narray representation of a univariate table"),
18 | ("pd_Series_Table", "Table", "pd.Series representation of a data table"),
19 | ("list_of_dict", "Table", "list of dictionaries with primitive entries"),
20 | ("polars_eager_table", "Table", "polars.DataFrame representation of a data table"),
21 | ("polars_lazy_table", "Table", "polars.LazyFrame representation of a data table"),
22 | ]
23 |
24 | MTYPE_LIST_TABLE = pd.DataFrame(MTYPE_REGISTER_TABLE)[0].values
25 |
--------------------------------------------------------------------------------
/docs/source/user_guide.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _user_guide:
3 |
4 | ==========
5 | User Guide
6 | ==========
7 |
8 | Welcome to skpro's user guide!
9 |
10 | The user guide consists of introductory notebooks, ordered by learning task and object type.
11 |
12 | For guided tutorials with videos, see our :ref:`tutorials` page.
13 |
14 | To run the user guide notebooks interactively, you can
15 | `launch them on binder `_
16 | without having to install anything.
17 |
18 | We assume basic familiarity with `scikit-learn`_. If you haven't worked with scikit-learn before, check out their
19 | `getting-started guide`_.
20 |
21 | The notebook files can be found `here `_.
22 |
23 | .. _scikit-learn: https://scikit-learn.org/stable/
24 | .. _getting-started guide: https://scikit-learn.org/stable/getting_started.html
25 |
26 | .. nbgallery::
27 | :glob:
28 |
29 | examples/*
30 |
--------------------------------------------------------------------------------
/skpro/distributions/tests/test_hurdle.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from skpro.distributions.hurdle import Hurdle
5 | from skpro.tests.test_switch import run_test_module_changed
6 |
7 |
8 | @pytest.mark.skipif(
9 | not run_test_module_changed("skpro.distributions"),
10 | reason="run only if skpro.distributions has been changed",
11 | )
12 | @pytest.mark.parametrize("params", Hurdle.get_test_params())
13 | def test_hurdle_less_than_zero(params):
14 | """Test that the index is correctly set after iat call."""
15 | distribution = Hurdle(**params)
16 |
17 | v = -1.0
18 |
19 | funcs_and_expected = [
20 | (distribution.cdf, 0.0),
21 | (distribution.pdf, 0.0),
22 | (distribution.pmf, 0.0),
23 | (distribution.log_pdf, -np.inf),
24 | (distribution.log_pmf, -np.inf),
25 | ]
26 |
27 | for func, expected in funcs_and_expected:
28 | values = func(v)
29 | assert (np.asarray(values) == expected).all()
30 |
--------------------------------------------------------------------------------
/skpro/distributions/tests/test_empirical.py:
--------------------------------------------------------------------------------
1 | """Tests for Empirical distributions."""
2 |
3 | import pandas as pd
4 | import pytest
5 |
6 | from skpro.distributions.empirical import Empirical
7 | from skpro.tests.test_switch import run_test_module_changed
8 |
9 |
10 | @pytest.mark.skipif(
11 | not run_test_module_changed("skpro.distributions"),
12 | reason="run only if skpro.distributions has been changed",
13 | )
14 | def test_empirical_iat_index():
15 | """Test that the index is correctly set after iat call."""
16 | spl_idx = pd.MultiIndex.from_product([[0, 1], [0, 1, 2]], names=["sample", "time"])
17 | spl = pd.DataFrame(
18 | [[0, 1], [2, 3], [10, 11], [6, 7], [8, 9], [4, 5]],
19 | index=spl_idx,
20 | columns=["a", "b"],
21 | )
22 | emp = Empirical(spl, columns=["a", "b"])
23 |
24 | emp_iat = emp.iat[0, 0]
25 | assert emp_iat.shape == ()
26 |
27 | assert not isinstance(emp_iat.spl.index, pd.MultiIndex)
28 | assert (emp_iat.spl.index == [0, 1]).all()
29 |
--------------------------------------------------------------------------------
/skpro/regression/tests/test_glum.py:
--------------------------------------------------------------------------------
1 | """Tests Glum regressor."""
2 |
3 | import pytest
4 |
5 | from skpro.regression.linear import GlumRegressor
6 | from skpro.tests.test_switch import run_test_for_class
7 |
8 |
9 | @pytest.mark.skipif(
10 | not run_test_for_class(GlumRegressor),
11 | reason="run test only if softdeps are present and incrementally (if requested)",
12 | )
13 | def test_glum_simple_use():
14 | """Test simple use of Glum regressor."""
15 | from sklearn.datasets import load_diabetes
16 | from sklearn.model_selection import train_test_split
17 |
18 | X, y = load_diabetes(return_X_y=True, as_frame=True)
19 | X = X.iloc[:200]
20 | y = y.iloc[:200]
21 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
22 |
23 | reg = GlumRegressor(family="normal")
24 | reg.fit(X_train, y_train)
25 | y_pred = reg.predict(X_test)
26 | y_pred_proba = reg.predict_proba(X_test)
27 |
28 | assert len(y_pred) == len(y_test)
29 | assert len(y_pred_proba) == len(y_test)
30 |
--------------------------------------------------------------------------------
/skpro/registry/tests/test_tags.py:
--------------------------------------------------------------------------------
1 | """Tests for tag register an tag functionality."""
2 |
3 | from skpro.registry._tags import OBJECT_TAG_REGISTER
4 |
5 |
6 | def test_tag_register_type():
7 | """Test the specification of the tag register. See _tags for specs."""
8 | assert isinstance(OBJECT_TAG_REGISTER, list)
9 | assert all(isinstance(tag, tuple) for tag in OBJECT_TAG_REGISTER)
10 |
11 | for tag in OBJECT_TAG_REGISTER:
12 | assert len(tag) == 4
13 | assert isinstance(tag[0], str)
14 | assert isinstance(tag[1], (str, list))
15 | if isinstance(tag[1], list):
16 | assert all(isinstance(x, str) for x in tag[1])
17 | assert isinstance(tag[2], (str, tuple))
18 | if isinstance(tag[2], tuple):
19 | assert len(tag[2]) == 2
20 | assert isinstance(tag[2][0], str)
21 | assert isinstance(tag[2][1], (list, str))
22 | if isinstance(tag[2][1], list):
23 | assert all(isinstance(x, str) for x in tag[2][1])
24 | assert isinstance(tag[3], str)
25 |
--------------------------------------------------------------------------------
/docs/source/contribute/code_of_conduct.rst:
--------------------------------------------------------------------------------
1 | .. _coc:
2 |
3 | ===============
4 | Code of conduct
5 | ===============
6 |
7 | The ``skpro`` project believes that everyone should be able to participate
8 | in our community without fear of harassment or discrimination. All contributors
9 | are expected to show respect and courtesy to other members of the community
10 | at all times.
11 |
12 | As an offshoot of the sktime project, we ask all members of the community to conform
13 | to the sktime project's
14 | `Code of Conduct `_.
15 |
16 | If you need to report a Code of Conduct incident, reach out
17 | to Dr. Franz Király by email at franz.kiraly@sktime.net.
18 |
19 | .. note::
20 |
21 | ``skpro`` is currently maintained by the ``sktime`` community, and subject to its
22 | Code of Conduct processes (including how to report incidents).
23 | This may change as the project matures.
24 | However, ``skpro``'s Code of Conduct will remain
25 | dedicated to promoting a community without harassment and discrimination.
26 |
--------------------------------------------------------------------------------
/skpro/base/_base.py:
--------------------------------------------------------------------------------
1 | """Base class and template for regressors and transformers."""
2 | from skbase.base import BaseEstimator as _BaseEstimator
3 | from skbase.base import BaseMetaEstimator as _BaseMetaEstimator
4 | from skbase.base import BaseObject as _BaseObject
5 |
6 |
7 | class _CommonTags:
8 | """Mixin for common tag definitions to all estimator base classes."""
9 |
10 | # config common to all estimators
11 | _config = {}
12 |
13 | _tags = {
14 | "estimator_type": "estimator",
15 | "authors": "skpro developers",
16 | "maintainers": "skpro developers",
17 | }
18 |
19 | @property
20 | def name(self):
21 | """Return the name of the object or estimator."""
22 | return self.__class__.__name__
23 |
24 |
25 | class BaseObject(_CommonTags, _BaseObject):
26 | """Base class for fittable objects."""
27 |
28 | def __init__(self):
29 | super().__init__()
30 |
31 |
32 | class BaseEstimator(_CommonTags, _BaseEstimator):
33 | """Base class for fittable objects."""
34 |
35 |
36 | class BaseMetaEstimator(_CommonTags, _BaseMetaEstimator):
37 | """Base class for fittable composite meta-objects."""
38 |
--------------------------------------------------------------------------------
/.github/workflows/dependency-review.yml:
--------------------------------------------------------------------------------
1 | # Dependency Review Action
2 | #
3 | # This Action will scan dependency manifest files that change as part of a Pull Request, surfacing known-vulnerable versions of the packages declared or updated in the PR. Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging.
4 | #
5 | # Source repository: https://github.com/actions/dependency-review-action
6 | # Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement
7 | name: 'Dependency Review'
8 | on: [pull_request]
9 |
10 | permissions:
11 | contents: read
12 |
13 | jobs:
14 | dependency-review:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - name: 'Checkout Repository'
18 | uses: actions/checkout@v6
19 | - name: 'Dependency Review'
20 | uses: actions/dependency-review-action@v4
21 | with:
22 | # Possible values: "critical", "high", "moderate", "low"
23 | # Will fail if a PR introduces a security vulnerability
24 | # that is the specified level or higher
25 | fail-on-severity: high
26 |
--------------------------------------------------------------------------------
/skpro/datatypes/_convert_utils/_coerce.py:
--------------------------------------------------------------------------------
1 | """Conercion utilities for mtypes."""
2 |
3 | __author__ = ["fkiraly"]
4 |
5 | import pandas as pd
6 |
7 |
8 | def _is_nullable_numeric(dtype):
9 | return dtype in ["Int64", "Float64", "boolean"]
10 |
11 |
12 | def _coerce_df_dtypes(obj):
13 | """Coerce pandas objects to non-nullable column types.
14 |
15 | Returns shallow copy and does not mutate input `obj`.
16 |
17 | Parameters
18 | ----------
19 | obj: pandas Series or DataFrame, or any object
20 |
21 | Returns
22 | -------
23 | obj unchanged, if obj is not pandas Series or DataFrame
24 | if obj is pandas Series or DataFrame,
25 | coerces nullable numeric columns to float (by reference via astype)
26 | """
27 | if isinstance(obj, pd.Series):
28 | if _is_nullable_numeric(obj.dtype):
29 | return obj.astype("float")
30 | return obj
31 |
32 | if isinstance(obj, pd.DataFrame):
33 | nullable_cols = [
34 | col for col in obj.columns if _is_nullable_numeric(obj.dtypes[col])
35 | ]
36 | if len(nullable_cols) > 0:
37 | obj = obj.astype({col: "float" for col in nullable_cols})
38 | return obj
39 |
40 | return obj
41 |
--------------------------------------------------------------------------------
/skpro/datatypes/__init__.py:
--------------------------------------------------------------------------------
1 | """Module exports: data type definitions, checks, validation, fixtures, converters."""
2 | # this module has been adapted from sktime
3 | # it is largely copy-pasting the Proba and Table parts
4 | # todo: factor this out into a common base
5 |
6 | __author__ = ["fkiraly"]
7 |
8 | from skpro.datatypes._check import (
9 | check_is_error_msg,
10 | check_is_mtype,
11 | check_is_scitype,
12 | check_raise,
13 | mtype,
14 | scitype,
15 | )
16 | from skpro.datatypes._convert import convert, convert_to
17 | from skpro.datatypes._examples import get_examples
18 | from skpro.datatypes._registry import (
19 | MTYPE_LIST_PROBA,
20 | MTYPE_LIST_TABLE,
21 | MTYPE_REGISTER,
22 | SCITYPE_LIST,
23 | SCITYPE_REGISTER,
24 | mtype_to_scitype,
25 | scitype_to_mtype,
26 | )
27 |
28 | __all__ = [
29 | "check_is_error_msg",
30 | "check_is_mtype",
31 | "check_is_scitype",
32 | "check_raise",
33 | "convert",
34 | "convert_to",
35 | "mtype",
36 | "get_examples",
37 | "mtype_to_scitype",
38 | "MTYPE_REGISTER",
39 | "MTYPE_LIST_PROBA",
40 | "MTYPE_LIST_TABLE",
41 | "scitype",
42 | "scitype_to_mtype",
43 | "SCITYPE_LIST",
44 | "SCITYPE_REGISTER",
45 | ]
46 |
--------------------------------------------------------------------------------
/skpro/metrics/_coerce.py:
--------------------------------------------------------------------------------
1 | """Output coercion utilities for metric classes."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 | # adapted from sktime
4 |
5 | import pandas as pd
6 |
7 |
8 | def _coerce_to_scalar(obj):
9 | """Coerce obj to scalar, from polymorphic input scalar or pandas."""
10 | if isinstance(obj, pd.DataFrame):
11 | assert len(obj) == 1
12 | assert len(obj.columns) == 1
13 | return obj.iloc[0, 0]
14 | if isinstance(obj, pd.Series):
15 | assert len(obj) == 1
16 | return obj.iloc[0]
17 | return obj
18 |
19 |
20 | def _coerce_to_df(obj):
21 | """Coerce to pd.DataFrame, from polymorphic input scalar or pandas."""
22 | return pd.DataFrame(obj)
23 |
24 |
25 | def _coerce_to_series(obj):
26 | """Coerce to pd.Series, from polymorphic input scalar or pandas."""
27 | if isinstance(obj, pd.DataFrame):
28 | assert len(obj.columns) == 1
29 | return obj.iloc[:, 0]
30 | elif isinstance(obj, pd.Series):
31 | return obj
32 | else:
33 | return pd.Series(obj)
34 |
35 |
36 | def _coerce_to_1d_numpy(obj):
37 | """Coerce to 1D np.ndarray, from pd.DataFrame or pd.Series."""
38 | if isinstance(obj, (pd.DataFrame, pd.Series)):
39 | obj = obj.values
40 | return obj.flatten()
41 |
--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
1 | """Main configuration file for pytest.
2 |
3 | Contents:
4 | adds an --only_changed_modules option to pytest
5 | this allows to turn on/off differential testing (for shorter runtime)
6 | "on" condition ensures that only estimators are tested that have changed,
7 | more precisely, only estimators whose class is in a module
8 | that has changed compared to the main branch
9 | by default, this is off, including for default local runs of pytest
10 | """
11 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
12 |
13 | __author__ = ["fkiraly"]
14 |
15 | import os
16 |
17 | from skbase.utils.dependencies import _check_soft_dependencies
18 |
19 | # used to prevent tkinter related errors in CI
20 | if _check_soft_dependencies("matplotlib", severity="none"):
21 | if os.environ.get("GITHUB_ACTIONS") == "true":
22 | import matplotlib
23 |
24 | matplotlib.use("Agg")
25 |
26 |
27 | def pytest_addoption(parser):
28 | """Pytest command line parser options adder."""
29 | parser.addoption(
30 | "--only_changed_modules",
31 | default=False,
32 | help="test only estimators from modules that have changed compared to main",
33 | )
34 |
35 |
36 | def pytest_configure(config):
37 | """Pytest configuration preamble."""
38 | from skpro.tests import _config
39 |
40 | if config.getoption("--only_changed_modules") in [True, "True"]:
41 | _config.ONLY_CHANGED_MODULES = True
42 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test = pytest
3 |
4 | [tool.isort]
5 | profile = "black"
6 | multi_line_output = 3
7 |
8 | [tool:pytest]
9 | # ignore certain folders and pytest warnings
10 | addopts =
11 | --ignore build_tools
12 | --ignore examples
13 | --ignore docs
14 | --durations 10
15 | --timeout 600
16 | --cov skpro
17 | --cov-report xml
18 | --cov-report html
19 | --showlocals
20 | --only_changed_modules True
21 | -n auto
22 | filterwarnings =
23 | ignore::UserWarning
24 | ignore:numpy.dtype size changed
25 | ignore:numpy.ufunc size changed
26 |
27 | [flake8]
28 | # Default flake8 3.5 ignored flags
29 | ignore = E121, E123, E126, E226, E24, E704, W503, W504
30 | # inline with Black code formatter
31 | max-line-length = 88
32 | exclude =
33 | skpro/_contrib/*
34 | extend-ignore =
35 | # See https://github.com/PyCQA/pycodestyle/issues/373
36 | E203
37 |
38 | [metadata]
39 | description_file = README.md
40 | long_description_content_type = text/markdown
41 |
42 | [check-manifest]
43 | ignore =
44 | .binder/**
45 | .all-contributorsrc
46 | .coveragerc
47 | examples/**
48 | build_tools/**
49 | __check_build/**
50 | docs/**
51 | Makefile
52 | CODEOWNERS
53 | CONTRIBUTING.md
54 | *.yaml
55 | *.yml
56 |
57 | [isort]
58 | profile = black
59 |
60 | [pydocstyle]
61 | convention = numpy
62 | match = (?!test_).*\.py
63 |
64 | [bdist_wheel]
65 | universal = false
66 |
67 | [sdist]
68 | formats = gztar
69 |
--------------------------------------------------------------------------------
/skpro/utils/random_state.py:
--------------------------------------------------------------------------------
1 | """Utilities for handling the random_state variable."""
2 | # copied from scikit-learn to avoid dependency on sklearn private methods
3 |
4 | import numpy as np
5 | from sklearn.utils import check_random_state
6 |
7 |
8 | def set_random_state(estimator, random_state=0):
9 | """Set fixed random_state parameters for an estimator.
10 |
11 | Finds all parameters ending ``random_state`` and sets them to integers
12 | derived from ``random_state``.
13 |
14 | Parameters
15 | ----------
16 | estimator : estimator supporting get_params, set_params
17 | Estimator with potential randomness managed by random_state parameters.
18 |
19 | random_state : int, RandomState instance or None, default=None
20 | Pseudo-random number generator to control the generation of the random
21 | integers. Pass an int for reproducible output across multiple function calls.
22 |
23 | Notes
24 | -----
25 | This does not necessarily set *all* ``random_state`` attributes that
26 | control an estimator's randomness, only those accessible through
27 | ``estimator.get_params()``.
28 | """
29 | random_state = check_random_state(random_state)
30 | to_set = {}
31 | for key in sorted(estimator.get_params(deep=True)):
32 | if key == "random_state" or key.endswith("__random_state"):
33 | to_set[key] = random_state.randint(np.iinfo(np.int32).max)
34 |
35 | if to_set:
36 | estimator.set_params(**to_set)
37 |
--------------------------------------------------------------------------------
/skpro/datatypes/_common.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Common private utilities for checkers and converters."""
3 |
4 | __author__ = ["fkiraly"]
5 |
6 |
7 | def _metadata_requested(return_metadata):
8 | """Return whether some metadata has been requested."""
9 | return not isinstance(return_metadata, bool) or return_metadata
10 |
11 |
12 | def _ret(valid, msg, metadata, return_metadata):
13 | """Return switch for checker functions."""
14 | if _metadata_requested(return_metadata):
15 | return valid, msg, metadata
16 | else:
17 | return valid
18 |
19 |
20 | def _req(key, return_metadata):
21 | """Return whether metadata key is requested, boolean."""
22 | if isinstance(return_metadata, bool):
23 | return return_metadata
24 | elif isinstance(return_metadata, str) and not isinstance(key, list):
25 | return return_metadata == key
26 | elif isinstance(return_metadata, str) and isinstance(key, list):
27 | return return_metadata in key
28 | elif isinstance(return_metadata, list) and not isinstance(key, list):
29 | return key in return_metadata
30 | elif isinstance(return_metadata, list) and isinstance(key, list):
31 | return len(set(key).intersection(return_metadata)) > 0
32 | else:
33 | return False
34 |
35 |
36 | def _wr(d, key, val, return_metadata):
37 | """Metadata write switch for checker functions."""
38 | if _req(key, return_metadata):
39 | d[key] = val
40 |
41 | return d
42 |
--------------------------------------------------------------------------------
/docs/source/about/history.rst:
--------------------------------------------------------------------------------
1 | .. _history:
2 |
3 | =======
4 | History
5 | =======
6 |
7 | ``skpro`` was started in 2017 by Franz Király and his then-student Frithjof Gressmann
8 | as a `scikit-learn`_ like python package for probabilistic supervised regression.
9 |
10 | ``skpro`` was then abandoned, from 2019, at version 1.0.1, as development in
11 | Franz Király research group continued to be focused on ``sktime``.
12 |
13 | In 2022-23, ``sktime``'s base module was turned into a separate package,
14 | `skbase`_, intended as a workbench to allow easy templating and creation of
15 | `scikit-learn`-likes.
16 |
17 | Using the templating scaffold of ``skbase``, ``skpro`` was finally revived
18 | in 2023 by Franz Király, Frithjof Gressmann, Anirban Ray, and Alex Gregory,
19 | built upon a fully rearchitectured, ``skbase`` reliant API,
20 | as version 2.0.0.
21 |
22 | The joint base interface enables mutual compabitibilty between ``skpro``, ``sklearn``,
23 | and ``sktime``, with ``skpro`` probabilistic regressors being potential components used
24 | for probabilistic forecasting in ``sktime``.
25 |
26 | Development is supported by members of the ``sktime`` project,
27 | new core developers and the broader community (see
28 | `contributors `_).
29 |
30 | If you are interested in contributing, check out our
31 | :ref:`Contributing ` guide.
32 |
33 | .. _scikit-learn: https://scikit-learn.org/stable/index.html
34 | .. _skbase: https://skbase.readthedocs.io/en/latest/
35 | .. _sktime: https://www.sktime.net/en/stable/index.html
36 |
--------------------------------------------------------------------------------
/skpro/registry/tests/test_scitype.py:
--------------------------------------------------------------------------------
1 | """Tests for scitype typing function."""
2 |
3 | import pytest
4 |
5 | from skpro.registry._scitype import scitype
6 |
7 |
8 | @pytest.mark.parametrize("coerce_to_list", [True, False])
9 | def test_scitype(coerce_to_list):
10 | """Test that the scitype function recovers the correct scitype(s)."""
11 | from skpro.distributions.laplace import Laplace
12 | from skpro.regression.mapie import MapieRegressor
13 | from skpro.regression.residual import ResidualDouble
14 |
15 | # test that scitype works for classes with soft dependencies
16 | result_mapie = scitype(MapieRegressor, coerce_to_list=coerce_to_list)
17 | if coerce_to_list:
18 | assert isinstance(result_mapie, list)
19 | assert "regressor_proba" == result_mapie[0]
20 | else:
21 | assert "regressor_proba" == result_mapie
22 |
23 | # test that scitype works for instances
24 | inst = ResidualDouble.create_test_instance()
25 | result_naive = scitype(inst, coerce_to_list=coerce_to_list)
26 | if coerce_to_list:
27 | assert isinstance(result_naive, list)
28 | assert "regressor_proba" == result_naive[0]
29 | else:
30 | assert "regressor_proba" == result_naive
31 |
32 | # test distribution object
33 | result_transformer = scitype(Laplace, coerce_to_list=coerce_to_list)
34 | if coerce_to_list:
35 | assert isinstance(result_transformer, list)
36 | assert "distribution" == result_transformer[0]
37 | else:
38 | assert "distribution" == result_transformer
39 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## How to contribute to skpro
2 |
3 | #### **Did you find a bug?**
4 |
5 | * **Ensure the bug was not already reported** by searching on GitHub under [Issues](https://github.com/sktime/skpro/issues).
6 |
7 | * If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/sktime/skpro/issues/new). Be sure to include a **title and clear description**, as much relevant information as possible, and a **code sample** or an **executable test case** demonstrating the expected behavior that is not occurring.
8 |
9 | * Please follow the further discussion in case more information is needed or questions arise.
10 |
11 | #### **Did you write a patch that fixes a bug?**
12 |
13 | * Open a new GitHub pull request with the patch.
14 |
15 | * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable.
16 |
17 | #### **Do you intend to add a new feature or change an existing one?**
18 |
19 | * Suggest your change in an issue and offer to implement the feature.
20 |
21 | * Wait for positive feedback in order to avoid double work (maybe your idea is already in development).
22 |
23 | * Implement and send a PR
24 |
25 | #### **Do you have questions about the source code?**
26 |
27 | * Ask any question about how to use skpro using the forum.
28 |
29 | #### **Do you want to contribute to the skpro documentation?**
30 |
31 | * Please send PR that propose changes to the docs directory
32 |
33 | skpro is a team effort. We encourage you to pitch in and join us!
34 |
35 | Thanks! :heart: :heart: :heart:
36 |
37 | skpro Team
38 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2017 - 2023, The skpro developers.
4 |
5 | All rights reserved.
6 |
7 | Redistribution and use in source and binary forms, with or without
8 | modification, are permitted provided that the following conditions are met:
9 |
10 | * Redistributions of source code must retain the above copyright notice, this
11 | list of conditions and the following disclaimer.
12 |
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 | this list of conditions and the following disclaimer in the documentation
15 | and/or other materials provided with the distribution.
16 |
17 | * Neither the name of the copyright holder nor the names of its
18 | contributors may be used to endorse or promote products derived from
19 | this software without specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/skpro/datatypes/_table/_base.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Base class for data types."""
3 |
4 | __author__ = ["fkiraly"]
5 |
6 | from skpro.datatypes._base import BaseDatatype
7 |
8 |
9 | class BaseTable(BaseDatatype):
10 | """Base class for Table data types.
11 |
12 | Parameters are inferred by check.
13 |
14 | Parameters
15 | ----------
16 | is_univariate: bool
17 | True iff table has one variable
18 | is_empty: bool
19 | True iff table has no variables or no instances
20 | has_nans: bool
21 | True iff the table contains NaN values
22 | n_instances: int
23 | number of instances/rows in the table
24 | n_features: int
25 | number of variables in table
26 | feature_names: list of int or object
27 | names of variables in table
28 | """
29 |
30 | _tags = {
31 | "scitype": "Table",
32 | "name": None, # any string
33 | "name_python": None, # lower_snake_case
34 | "name_aliases": [],
35 | "python_version": None,
36 | "python_dependencies": None,
37 | }
38 |
39 | def __init__(
40 | self,
41 | is_univariate=None,
42 | is_empty=None,
43 | has_nans=None,
44 | n_instances=None,
45 | n_features=None,
46 | feature_names=None,
47 | ):
48 | self.is_univariate = is_univariate
49 | self.is_empty = is_empty
50 | self.has_nans = has_nans
51 | self.n_instances = n_instances
52 | self.n_features = n_features
53 | self.feature_names = feature_names
54 |
55 | super().__init__()
56 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: "[BUG]"
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 |
14 |
15 | **To Reproduce**
16 |
21 |
22 |
23 | **Expected behavior**
24 |
27 |
28 |
29 | **Environment**
30 |
35 |
36 |
37 | **Additional context**
38 |
41 |
--------------------------------------------------------------------------------
/skpro/distributions/adapters/scipy/_empirical.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Empirical distribution."""
3 |
4 | __author__ = ["fkiraly"]
5 |
6 | import numpy as np
7 | import pandas as pd
8 |
9 |
10 | def empirical_from_discrete(dist, index=None, columns=None):
11 | """Convert a list of scipy discrete distributions to an skpro Empirical object.
12 |
13 | Parameters
14 | ----------
15 | dist : list of rv_discrete
16 | List of scipy discrete distributions, instances of rv_discrete.
17 | index : pd.Index or coercible, optional
18 | Index of the resulting empirical distribution.
19 | Must be the same length as dist.
20 | columns : pd.Index or coercible, optional
21 | Columns of the resulting empirical distribution.
22 | Must be of length 1.
23 | """
24 | from skpro.distributions.empirical import Empirical
25 |
26 | if index is None:
27 | index = pd.RangeIndex(len(dist))
28 |
29 | xks = [d.xk for d in dist]
30 | pks = [d.pk for d in dist]
31 |
32 | lens = [len(xk) for xk in xks]
33 | idxs_inst = [np.repeat(index[i], leni) for i, leni in enumerate(lens)]
34 | idx_inst_flat = np.concatenate(idxs_inst)
35 | idx_spl = [np.arange(leni) for leni in lens]
36 | idx_spl_flat = np.concatenate(idx_spl)
37 |
38 | idx_mult = pd.MultiIndex.from_arrays([idx_spl_flat, idx_inst_flat])
39 |
40 | spl = pd.DataFrame(np.concatenate(xks), index=idx_mult, columns=columns)
41 | weights = pd.Series(np.concatenate(pks), index=idx_mult)
42 |
43 | emp = Empirical(
44 | spl=spl, weights=weights, time_indep=True, index=index, columns=columns
45 | )
46 | return emp
47 |
--------------------------------------------------------------------------------
/skpro/utils/utils.py:
--------------------------------------------------------------------------------
1 | # LEGACY MODULE - TODO: remove or refactor
2 |
3 |
4 | def not_existing(f):
5 | """
6 | Decorates an interface method to declare it theoretically non existent
7 |
8 | Parameters
9 | ----------
10 | f Method to decorate
11 |
12 | Returns
13 | -------
14 | Decorated method
15 | """
16 | f.not_existing = True
17 |
18 | return f
19 |
20 |
21 | def ensure_existence(f):
22 | """Ensures that method is not marked as non_existent
23 |
24 | Parameters
25 | ----------
26 | f Method
27 |
28 | Raises
29 | ------
30 | NotImplementedError if the method is marked as non existent
31 |
32 | Returns
33 | -------
34 | Method f
35 | """
36 | if getattr(f, "not_existing", False):
37 | raise NotImplementedError(
38 | "The distribution has no " + f.__name__ + " function. "
39 | "You may use an adapter that supports its approximation."
40 | )
41 |
42 | return f
43 |
44 |
45 | def to_percent(value, return_float=True):
46 | """Converts values into a percent representation
47 |
48 | Args:
49 | value: int/float
50 | Number representing a percentage
51 | return_float: bool
52 | If true, float representing the percentage is returned
53 |
54 | Returns: int/float
55 | A percentage
56 | """
57 |
58 | def percent(p):
59 | if return_float:
60 | return float(p)
61 | else:
62 | return int(round(p * 100))
63 |
64 | if isinstance(value, int):
65 | value = float(value) / 100.0
66 |
67 | if value <= 0:
68 | return percent(0)
69 | else:
70 | return percent(value)
71 |
--------------------------------------------------------------------------------
/skpro/distributions/adapters/statsmodels/_empirical.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Empirical distribution."""
3 |
4 | __author__ = ["fkiraly"]
5 |
6 | import numpy as np
7 | import pandas as pd
8 |
9 |
10 | def empirical_from_rvdf(dist, index=None, columns=None):
11 | """Convert a statsmodels rv_discrete_float to an skpro Empirical object.
12 |
13 | Parameters
14 | ----------
15 | dist : rv_discrte_float object
16 | Instance of rv_discrete.
17 | index : pd.Index or coercible, optional
18 | Index of the resulting empirical distribution.
19 | Must be the same length as dist.
20 | columns : pd.Index or coercible, optional
21 | Columns of the resulting empirical distribution.
22 | Must be of length 1.
23 | """
24 | from skpro.distributions.empirical import Empirical
25 |
26 | if index is None:
27 | index = pd.RangeIndex(len(dist))
28 |
29 | xk = dist.xk
30 | pk = dist.pk
31 |
32 | xks = [xk[i] for i in range(len(xk))]
33 | pks = [pk[i] for i in range(len(pk))]
34 |
35 | lens = [len(xk) for xk in xks]
36 | idxs_inst = [np.repeat(index[i], leni) for i, leni in enumerate(lens)]
37 | idx_inst_flat = np.concatenate(idxs_inst)
38 | idx_spl = [np.arange(leni) for leni in lens]
39 | idx_spl_flat = np.concatenate(idx_spl)
40 |
41 | idx_mult = pd.MultiIndex.from_arrays([idx_spl_flat, idx_inst_flat])
42 |
43 | spl = pd.DataFrame(np.concatenate(xks), index=idx_mult, columns=columns)
44 | weights = pd.Series(np.concatenate(pks), index=idx_mult)
45 |
46 | emp = Empirical(
47 | spl=spl, weights=weights, time_indep=True, index=index, columns=columns
48 | )
49 | return emp
50 |
--------------------------------------------------------------------------------
/skpro/regression/base/adapters/_sklearn.py:
--------------------------------------------------------------------------------
1 | """Adapters to sklearn linear regressors with probabilistic components."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | __author__ = ["fkiraly"]
5 |
6 | from skpro.regression.base._delegate import _DelegatedProbaRegressor
7 |
8 |
9 | class _DelegateWithFittedParamForwarding(_DelegatedProbaRegressor):
10 | """Common base class for delegates with attribute forwarding.
11 |
12 | Assumes that delegate has an attribute `estimator_`,
13 | from which fitted attributes are forwarded to self.
14 | """
15 |
16 | # attribute for _DelegatedProbaRegressor, which then delegates
17 | # all non-overridden methods are same as of getattr(self, _delegate_name)
18 | # see further details in _DelegatedRegressor docstring
19 | _delegate_name = "_estimator"
20 | # _estimator, not estimator_, because we do not want to expose it as
21 | # fitted params - fitted params are instead forwarded
22 |
23 | def _fit(self, X, y):
24 | """Fit regressor to training data.
25 |
26 | Writes to self:
27 | Sets fitted model attributes ending in "_".
28 |
29 | Parameters
30 | ----------
31 | X : pandas DataFrame
32 | feature instances to fit regressor to
33 | y : pandas DataFrame, must be same length as X
34 | labels to fit regressor to
35 |
36 | Returns
37 | -------
38 | self : reference to self
39 | """
40 | estimator = self._get_delegate()
41 | estimator.fit(X=X, y=y)
42 |
43 | for attr in self.FITTED_PARAMS_TO_FORWARD:
44 | setattr(self, attr, getattr(estimator.estimator_, attr))
45 |
46 | return self
47 |
--------------------------------------------------------------------------------
/.github/workflows/update_contributors.yml:
--------------------------------------------------------------------------------
1 | name: Update Contributors
2 |
3 | on:
4 | schedule:
5 | - cron: '0 0 * * 6' # Sat 00:00 UTC (weekly)
6 | workflow_dispatch:
7 |
8 | jobs:
9 | generate-markdown-and-create-pr:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v6
13 | with:
14 | fetch-depth: 0
15 |
16 | - name: Set up Node
17 | uses: actions/setup-node@v6
18 | with:
19 | node-version: '18'
20 |
21 | - name: Install official all-contributors CLI
22 | run: npm install -g all-contributors-cli@6.24.0
23 |
24 | - name: Generate CONTRIBUTORS.md
25 | run: npx all-contributors generate
26 |
27 | - name: Commit and create PR if changed
28 | env:
29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
30 | run: |
31 | git config user.name "github-actions[bot]"
32 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
33 | if ! git diff --quiet CONTRIBUTORS.md; then
34 | BRANCH="update-contributors-$(date +%F)"
35 | git checkout -b "$BRANCH"
36 | git add CONTRIBUTORS.md
37 | git commit -m "[MNT] all-contributors update"
38 | git push --set-upstream origin "$BRANCH"
39 | # create PR using gh (you can replace with actions/create-pull-request if preferred)
40 | gh auth login --with-token <<< "$GITHUB_TOKEN"
41 | gh pr create --title "[MNT] all-contributors update" \
42 | --body "Automated update to CONTRIBUTORS.md generated by workflow run ${{ github.run_id }}." \
43 | --head "$BRANCH" --base main
44 | else
45 | echo "No changes to CONTRIBUTORS.md"
46 | fi
47 |
--------------------------------------------------------------------------------
/docs/source/contribute.rst:
--------------------------------------------------------------------------------
1 | .. _contrib_guide:
2 |
3 | ============
4 | Get Involved
5 | ============
6 |
7 | ``skpro`` is a community-driven project and your help is extremely welcome. If you
8 | get stuck, please don't hesitate to chat with us or raise an issue.
9 |
10 | .. toctree::
11 | :maxdepth: 1
12 | :hidden:
13 |
14 | developer_guide
15 | contribute/team
16 | contribute/code_of_conduct
17 |
18 | .. grid:: 1 2 2 2
19 | :gutter: 3
20 |
21 | .. grid-item-card:: How to Contribute
22 | :text-align: center
23 |
24 | New to ``skpro``? Learn how you can contribute.
25 |
26 | +++
27 |
28 | .. button-ref:: contribute/how_to_contribute
29 | :color: primary
30 | :click-parent:
31 | :expand:
32 |
33 | Contribute
34 |
35 | .. grid-item-card:: Developer guide
36 | :text-align: center
37 |
38 | Help develop ``skpro``.
39 |
40 | +++
41 |
42 | .. button-ref:: developer_guide
43 | :color: primary
44 | :click-parent:
45 | :expand:
46 |
47 | Development
48 |
49 | .. grid-item-card:: Development Team
50 | :text-align: center
51 |
52 | Meet ``skpro``'s core development team.
53 |
54 | +++
55 |
56 | .. button-ref:: contribute/team
57 | :color: primary
58 | :click-parent:
59 | :expand:
60 |
61 | Development Team
62 |
63 | .. grid-item-card:: Code of Conduct
64 | :text-align: center
65 |
66 | Understand our code of conduct.
67 |
68 | +++
69 |
70 | .. button-ref:: contribute/code_of_conduct
71 | :color: primary
72 | :click-parent:
73 | :expand:
74 |
75 | Code of Conduct
76 |
--------------------------------------------------------------------------------
/docs/source/users.rst:
--------------------------------------------------------------------------------
1 | .. _user_documentation:
2 |
3 | =============
4 | Documentation
5 | =============
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 | :hidden:
10 |
11 | installation
12 | tutorials
13 | user_guide
14 | changelog
15 | related_software
16 |
17 |
18 | .. grid:: 1 2 2 2
19 | :gutter: 3
20 |
21 | .. grid-item-card:: Installation
22 | :text-align: center
23 |
24 | Install ``skpro``.
25 |
26 | +++
27 |
28 | .. button-ref:: installation
29 | :color: primary
30 | :click-parent:
31 | :expand:
32 |
33 | Installation
34 |
35 | .. grid-item-card:: Tutorials
36 | :text-align: center
37 |
38 | Introductory Tutorials.
39 |
40 | +++
41 |
42 | .. button-ref:: tutorials
43 | :color: primary
44 | :click-parent:
45 | :expand:
46 |
47 | Tutorials
48 |
49 | .. grid-item-card:: User Guide
50 | :text-align: center
51 |
52 | Learn about using ``skpro``.
53 |
54 | +++
55 |
56 | .. button-ref:: user_guide
57 | :color: primary
58 | :click-parent:
59 | :expand:
60 |
61 | User Guide
62 |
63 | .. grid-item-card:: Changelog
64 | :text-align: center
65 |
66 | Information for developers.
67 |
68 | +++
69 |
70 | .. button-ref:: changelog
71 | :color: primary
72 | :click-parent:
73 | :expand:
74 |
75 | Changelog
76 |
77 | .. grid-item-card:: Related Software
78 | :text-align: center
79 |
80 | Explore software related to ``skpro``.
81 |
82 | +++
83 |
84 | .. button-ref:: related_software
85 | :color: primary
86 | :click-parent:
87 | :expand:
88 |
89 | Related Software
90 |
--------------------------------------------------------------------------------
/docs/source/developer_guide.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _developer_guide:
3 |
4 | ===============
5 | Developer Guide
6 | ===============
7 |
8 | Welcome to skpro's developer guide!
9 |
10 | New developers should:
11 |
12 | * sign up to the developer Discord (see link in README) and say hello in the ``#contributors`` channel
13 | * install a development version of ``skpro``, see :ref:`installation`
14 | * set up CI tests locally and ensure they know how to check them remotely, see :ref:`continuous_integration`
15 | * get familiar with the git workflow (:ref:`git_workflow`) and coding standards (:ref:`coding_standards`)
16 | * feel free, at any point in time, to post questions on Discord, or ask core developers for help (see here for a `list of core developers `_)
17 |
18 | * feel free to join the collaborative coding sessions for pair programming or getting help on developer set-up
19 |
20 | Further special topics are listed below.
21 |
22 | sktime follows `scikit-learn `_\ 's API whenever possible.
23 | If you're new to scikit-learn, take a look at their `getting-started guide `_.
24 | If you're already familiar with scikit-learn, you may still learn something new from their `developers' guide `_.
25 |
26 | .. toctree::
27 | :maxdepth: 1
28 |
29 | installation
30 | developer_guide/git_workflow
31 | developer_guide/continuous_integration
32 | developer_guide/coding_standards
33 | developer_guide/reviewer_guide
34 | developer_guide/add_estimators
35 | developer_guide/add_dataset
36 | developer_guide/deprecation
37 | developer_guide/dependencies
38 | developer_guide/documentation
39 | developer_guide/testing_framework
40 | developer_guide/release
41 | developer_guide/contrib_roadmap
42 | developer_guide/contrib_governance
43 |
--------------------------------------------------------------------------------
/skpro/utils/_maint/tests/test_show_versions.py:
--------------------------------------------------------------------------------
1 | """Tests for the show_versions utility."""
2 | import pathlib
3 | import uuid
4 |
5 | from skbase.utils.dependencies import _check_soft_dependencies
6 |
7 | from skpro.utils._maint._show_versions import (
8 | DEFAULT_DEPS_TO_SHOW,
9 | _get_deps_info,
10 | show_versions,
11 | )
12 |
13 |
14 | def test_show_versions_runs():
15 | """Test that show_versions runs without exceptions."""
16 | # only prints, should return None
17 | assert show_versions() is None
18 |
19 |
20 | def test_deps_info():
21 | """Test that _get_deps_info returns package/version dict as per contract."""
22 | deps_info = _get_deps_info()
23 | assert isinstance(deps_info, dict)
24 | assert set(deps_info.keys()) == {"sktime"}
25 |
26 | deps_info_default = _get_deps_info(DEFAULT_DEPS_TO_SHOW)
27 | assert isinstance(deps_info_default, dict)
28 | assert set(deps_info_default.keys()) == set(DEFAULT_DEPS_TO_SHOW)
29 |
30 | KEY_ALIAS = {"sklearn": "scikit-learn", "skbase": "scikit-base"}
31 |
32 | for key in DEFAULT_DEPS_TO_SHOW:
33 | pkg_name = KEY_ALIAS.get(key, key)
34 | key_is_available = _check_soft_dependencies(pkg_name, severity="none")
35 | assert (deps_info_default[key] is None) != key_is_available
36 | if key_is_available:
37 | assert _check_soft_dependencies(f"{pkg_name}=={deps_info_default[key]}")
38 | deps_single_key = _get_deps_info([key])
39 | assert set(deps_single_key.keys()) == {key}
40 |
41 |
42 | def test_deps_info_deps_missing_package_present_directory():
43 | """Test that _get_deps_info does not fail if a dependency is missing."""
44 | dummy_package_name = uuid.uuid4().hex
45 |
46 | dummy_folder_path = pathlib.Path(dummy_package_name)
47 | dummy_folder_path.mkdir()
48 |
49 | assert _get_deps_info([dummy_package_name]) == {dummy_package_name: None}
50 |
51 | dummy_folder_path.rmdir()
52 |
--------------------------------------------------------------------------------
/skpro/distributions/poisson.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Poisson probability distribution."""
3 |
4 | __author__ = ["fkiraly", "malikrafsan"]
5 |
6 | import pandas as pd
7 | from scipy.stats import poisson, rv_discrete
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class Poisson(_ScipyAdapter):
13 | """Poisson distribution.
14 |
15 | Most methods wrap ``scipy.stats.poisson``.
16 |
17 | Parameters
18 | ----------
19 | mu : float or array of float (1D or 2D)
20 | mean of the distribution
21 | index : pd.Index, optional, default = RangeIndex
22 | columns : pd.Index, optional, default = RangeIndex
23 |
24 | Examples
25 | --------
26 | >>> from skpro.distributions import Poisson
27 |
28 | >>> distr = Poisson(mu=[[1, 1], [2, 3], [4, 5]])
29 | """
30 |
31 | _tags = {
32 | "capabilities:approx": ["energy", "pdfnorm"],
33 | "capabilities:exact": ["mean", "var", "pmf", "log_pmf", "cdf", "ppf"],
34 | "distr:measuretype": "discrete",
35 | "distr:paramtype": "parametric",
36 | "broadcast_init": "on",
37 | }
38 |
39 | def __init__(self, mu, index=None, columns=None):
40 | self.mu = mu
41 |
42 | super().__init__(index=index, columns=columns)
43 |
44 | def _get_scipy_object(self) -> rv_discrete:
45 | return poisson
46 |
47 | def _get_scipy_param(self) -> dict:
48 | mu = self._bc_params["mu"]
49 |
50 | return [mu], {}
51 |
52 | @classmethod
53 | def get_test_params(cls, parameter_set="default"):
54 | """Return testing parameter settings for the estimator."""
55 | params1 = {"mu": [[1, 1], [2, 3], [4, 5]]}
56 | params2 = {
57 | "mu": 0.1,
58 | "index": pd.Index([1, 2, 5]),
59 | "columns": pd.Index(["a", "b"]),
60 | }
61 | return [params1, params2]
62 |
--------------------------------------------------------------------------------
/skpro/regression/tests/test_ondil.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import pytest
4 | from skbase.utils.dependencies import _check_soft_dependencies
5 |
6 | from skpro.regression.ondil import OndilOnlineGamlss
7 |
8 |
9 | @pytest.mark.skipif(
10 | not _check_soft_dependencies(["ondil"], severity="none"),
11 | reason="skip test if ondil is not installed in environment",
12 | )
13 | def test_ondil_instantiation_and_get_test_params():
14 | """Basic smoke test for the Ondil wrapper.
15 |
16 | The test is skipped if the optional dependency ``ondil`` is not
17 | installed. It verifies that ``get_test_params`` returns at least one
18 | parameter set and that the estimator can be instantiated with it.
19 | """
20 | # ensure ondil import succeeds at runtime; skip the test if import fails
21 | pytest.importorskip("ondil")
22 |
23 | params = OndilOnlineGamlss.get_test_params()
24 | if isinstance(params, dict):
25 | params = [params]
26 | assert len(params) >= 1
27 |
28 | p = params[0]
29 | est = OndilOnlineGamlss(**p)
30 | assert isinstance(est, OndilOnlineGamlss)
31 |
32 |
33 | @pytest.mark.skipif(
34 | not _check_soft_dependencies(["ondil"], severity="none"),
35 | reason="skip test if ondil is not installed in environment",
36 | )
37 | def test_ondil_fit_smoke():
38 | """Try a light-weight fit call on tiny data to validate wiring.
39 |
40 | This is a smoke test only; if the upstream API requires more complex
41 | constructor args or data handling, the test will be adjusted later.
42 | """
43 | # create tiny dataset
44 | X = pd.DataFrame({"a": [0.0, 1.0, 2.0]})
45 | y = pd.DataFrame(np.array([[0.1], [1.1], [1.9]]))
46 |
47 | # ensure ondil import succeeds at runtime; skip the test if import fails
48 | pytest.importorskip("ondil")
49 |
50 | est = OndilOnlineGamlss()
51 |
52 | # fit should run without raising (best-effort); if upstream raises,
53 | # surface the error so developers can adapt the wrapper.
54 | est.fit(X, y)
55 | assert est.is_fitted
56 |
--------------------------------------------------------------------------------
/docs/source/developer_guide/reviewer_guide.rst:
--------------------------------------------------------------------------------
1 | .. _reviewer_guide:
2 | .. _rev_guide:
3 |
4 | ==============
5 | Reviewer Guide
6 | ==============
7 |
8 | Pull Request reviewers play an important role in ``skpro``'s development.
9 |
10 | .. warning::
11 |
12 | The reviewer guide is under development. If you have suggestions, open an
13 | issue or Pull Request.
14 |
15 |
16 | Triage
17 | ======
18 |
19 | * Assign relevant labels
20 | * Assign to relevant project board
21 | * Title: Is it using the 3-letter codes? Is it understandable?
22 | * Description: Is it understandable? Any related issues/PRs?
23 | * CI checks: approval for first-time contributors, any help needed with
24 | code/doc quality checks?
25 | * Merge conflicts
26 |
27 | Code Review
28 | ===========
29 |
30 | * Unit testing:
31 |
32 | - Are the code changes tested?
33 | - Are the tests understandable?
34 | - Are all changes covered by tests? We usually aim for a test coverage of
35 | at least 90%.
36 | - Code coverage will be reported as part of the automated CI checks on GitHub
37 |
38 | * Test changes locally: Does everything work as expected?
39 | * Deprecation warnings:
40 |
41 | - Has the public API changed?
42 | - Have deprecation warnings been added before making the changes?
43 |
44 | .. _reviewer_guide_doc:
45 |
46 | Documentation Review
47 | ====================
48 |
49 | * Are the docstrings complete and understandable to users?
50 | * Do they follow the NumPy format and ``skbase`` conventions?
51 | * If the same parameter, attribute, return object or error is included elsewhere
52 | in ``skpro`` are the docstring descriptions as similar as possible?
53 | * Does the online documentation render correctly with the changes?
54 | * Do the docstrings contain links to the relevant topics in the
55 | :ref:`glossary` or :ref:`user_guide`?
56 |
57 | .. note::
58 |
59 | If a Pull Request does not meet ``skpro``'s
60 | :ref:`documentation guide ` a reviewer should
61 | require the documentation be updated prior to approving the Pull Request.
62 |
--------------------------------------------------------------------------------
/docs/source/api_reference/metrics.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _metrics_ref:
3 |
4 | Performance metrics
5 | ===================
6 |
7 | The :mod:`skpro.metrics` module contains metrics for evaluating
8 | probabilistic predictions, including survival and time-to-event predictions.
9 |
10 | All metrics in ``skpro`` can be listed using the ``skpro.registry.all_objects`` utility,
11 | using ``object_types="metric"``, optionally filtered by tags.
12 | Valid tags can be listed using ``sktime.registry.all_tags``.
13 |
14 | Survival/time-to-event specific metrics in ``skpro`` can be listed
15 | by filtering by ``capability:survival`` being ``True``.
16 |
17 | All probabilistic metrics can be used for survival
18 | prediction, by default they will ignore the censoring information.
19 | Note: this is different from subsetting to uncensored observations.
20 |
21 |
22 | Quantile and interval prediction metrics
23 | ----------------------------------------
24 |
25 | .. currentmodule:: skpro.metrics
26 |
27 | .. autosummary::
28 | :toctree: auto_generated/
29 | :template: class_with_call.rst
30 |
31 | PinballLoss
32 | EmpiricalCoverage
33 | ConstraintViolation
34 | IntervalWidth
35 |
36 | Distribution prediction metrics
37 | -------------------------------
38 |
39 | Distribution predictions are also known as conditional distribution predictions.
40 | (or conditional density predictions, if continuous).
41 |
42 | .. currentmodule:: skpro.metrics
43 |
44 | .. autosummary::
45 | :toctree: auto_generated/
46 | :template: class_with_call.rst
47 |
48 | CRPS
49 | LogLoss
50 | LinearizedLogLoss
51 | SquaredDistrLoss
52 | AUCalibration
53 |
54 | Survival prediction metrics
55 | ---------------------------
56 |
57 | Survival or time-to-event predictions are a variant of distribution predictions,
58 | where the ground truth may be censored.
59 | These metrics take the censoring information into account.
60 |
61 | .. currentmodule:: skpro.metrics.survival
62 |
63 | .. autosummary::
64 | :toctree: auto_generated/
65 | :template: class_with_call.rst
66 |
67 | ConcordanceHarrell
68 | SPLL
69 |
--------------------------------------------------------------------------------
/skpro/tests/_test_vm.py:
--------------------------------------------------------------------------------
1 | """Auxiliary script to test an estinator in its own virtual machine."""
2 |
3 | __all__ = ["run_test_vm"]
4 |
5 | import os
6 | import platform
7 |
8 | from skbase.utils.dependencies import _check_estimator_deps, _check_soft_dependencies
9 |
10 |
11 | def run_test_vm(cls_name):
12 | """Test an estimator in its own virtual machine.
13 |
14 | Takes a string which is the name of a class in the skpro registry,
15 | and runs ``check_estimator`` on it in a separate virtual machine,
16 | with deps determined by the tag ``python_dependencies`` of the class.
17 |
18 | Does not run the test if python and operating system versions
19 | are incompatible with the estimator's dependencies,
20 | as checked via ``_check_estimator_deps``.
21 |
22 | Parameters
23 | ----------
24 | cls_name : str
25 | Name of the estimator class to test, e.g., "ExampleForecaster".
26 |
27 | Raises
28 | ------
29 | Exception
30 | if the ``check_estimator`` fails, or if the estimator is not found.
31 | """
32 | from skpro.registry import craft
33 | from skpro.utils import check_estimator
34 |
35 | if _check_soft_dependencies("torch", severity="none"):
36 | # disable mps for macos runners if torch is available
37 | if platform.system() == "Darwin":
38 | import torch
39 |
40 | torch.backends.mps.is_available = lambda: False
41 |
42 | if _check_soft_dependencies("hf-xet", severity="none"):
43 | # to allow hf-xet to download models on macos runners on version `latest`
44 | if platform.system() == "Darwin":
45 | os.environ["HF_XET_NUM_CONCURRENT_RANGE_GETS"] = "4"
46 |
47 | cls = craft(cls_name)
48 | if _check_estimator_deps(cls, severity="none"):
49 | skips = cls.get_class_tag("tests:skip_by_name", None)
50 | check_estimator(cls, raise_exceptions=True, tests_to_exclude=skips)
51 | else:
52 | print( # noqa: T201
53 | f"Skipping estimator: {cls} due to incompatibility "
54 | "with python or OS version."
55 | ) # noqa: T201
56 |
--------------------------------------------------------------------------------
/skpro/utils/_doctest.py:
--------------------------------------------------------------------------------
1 | """Doctest utilities."""
2 | # copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | import contextlib
5 | import doctest
6 | import io
7 |
8 |
9 | def run_doctest(
10 | f,
11 | verbose=False,
12 | name=None,
13 | compileflags=None,
14 | optionflags=doctest.ELLIPSIS,
15 | raise_on_error=True,
16 | ):
17 | """Run doctests for a given function or class, and return or raise.
18 |
19 | Parameters
20 | ----------
21 | f : callable
22 | Function or class to run doctests for.
23 | verbose : bool, optional (default=False)
24 | If True, print the results of the doctests.
25 | name : str, optional (default=f.__name__, if available, otherwise "NoName")
26 | Name of the function or class.
27 | compileflags : int, optional (default=None)
28 | Flags to pass to the Python parser.
29 | optionflags : int, optional (default=doctest.ELLIPSIS)
30 | Flags to control the behaviour of the doctest.
31 | raise_on_error : bool, optional (default=True)
32 | If True, raise an exception if the doctests fail.
33 |
34 | Returns
35 | -------
36 | doctest_output : str
37 | Output of the doctests.
38 |
39 | Raises
40 | ------
41 | RuntimeError
42 | If raise_on_error=True and the doctests fail.
43 | """
44 | doctest_output_io = io.StringIO()
45 | with contextlib.redirect_stdout(doctest_output_io):
46 | doctest.run_docstring_examples(
47 | f=f,
48 | globs=globals(),
49 | verbose=verbose,
50 | name=name,
51 | compileflags=compileflags,
52 | optionflags=optionflags,
53 | )
54 | doctest_output = doctest_output_io.getvalue()
55 |
56 | if name is None:
57 | name = f.__name__ if hasattr(f, "__name__") else "NoName"
58 |
59 | if raise_on_error and len(doctest_output) > 0:
60 | raise RuntimeError(
61 | f"Docstring examples failed doctests "
62 | f"for {name}, doctest output: {doctest_output}"
63 | )
64 | return doctest_output
65 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for easier installation and cleanup.
2 | #
3 | # Uses self-documenting macros from here:
4 | # http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html
5 |
6 | PACKAGE=skpro
7 | DOC_DIR=./docs
8 | BUILD_TOOLS=./build_tools
9 | TEST_DIR=testdir
10 |
11 | .PHONY: help release install test lint clean dist doc docs
12 |
13 | .DEFAULT_GOAL := help
14 |
15 | help:
16 | @grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) |\
17 | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m\
18 | %s\n", $$1, $$2}'
19 |
20 | test: ## Run unit tests
21 | -rm -rf ${TEST_DIR}
22 | mkdir -p ${TEST_DIR}
23 | cp .coveragerc ${TEST_DIR}
24 | cp setup.cfg ${TEST_DIR}
25 | python -m pytest
26 |
27 | test_check_suite: ## run only estimator contract tests in TestAll classes
28 | -rm -rf ${TEST_DIR}
29 | mkdir -p ${TEST_DIR}
30 | cp .coveragerc ${TEST_DIR}
31 | cp setup.cfg ${TEST_DIR}
32 | python -m pytest -k 'TestAll' $(PYTESTOPTIONS)
33 |
34 | test_softdeps_full: ## Run all non-suite unit tests without soft dependencies
35 | -rm -rf ${TEST_DIR}
36 | mkdir -p ${TEST_DIR}
37 | cp setup.cfg ${TEST_DIR}
38 | cd ${TEST_DIR}
39 | python -m pytest -v --showlocals --durations=20 -k 'not TestAll' $(PYTESTOPTIONS)
40 |
41 | tests: test
42 |
43 | clean: ## Clean build dist and egg directories left after install
44 | rm -rf ./dist
45 | rm -rf ./build
46 | rm -rf ./pytest_cache
47 | rm -rf ./htmlcov
48 | rm -rf ./junit
49 | rm -rf ./$(PACKAGE).egg-info
50 | rm -rf coverage.xml
51 | rm -f MANIFEST
52 | rm -rf ./wheelhouse/*
53 | find . -type f -iname "*.so" -delete
54 | find . -type f -iname '*.pyc' -delete
55 | find . -type d -name '__pycache__' -empty -delete
56 |
57 | dist: ## Make Python source distribution
58 | python3 setup.py sdist bdist_wheel
59 |
60 | build:
61 | python -m build --sdist --wheel --outdir wheelhouse
62 |
63 | docs: doc
64 |
65 | doc: ## Build documentation with Sphinx
66 | $(MAKE) -C $(DOC_DIR) html
67 |
68 | nb: clean
69 | rm -rf .venv || true
70 | python3 -m venv .venv
71 | . .venv/bin/activate && python -m pip install .[all_extras,binder] && ./build_tools/run_examples.sh
72 |
--------------------------------------------------------------------------------
/skpro/regression/tests/test_glm.py:
--------------------------------------------------------------------------------
1 | """Tests Generalized Linear Model regressor."""
2 |
3 | import pandas as pd
4 | import pytest
5 |
6 | from skpro.regression.linear import GLMRegressor
7 | from skpro.tests.test_switch import run_test_for_class
8 |
9 |
10 | @pytest.mark.skipif(
11 | not run_test_for_class(GLMRegressor),
12 | reason="run test only if softdeps are present and incrementally (if requested)",
13 | )
14 | def test_glm_simple_use():
15 | """Test simple use of GLM regressor."""
16 | from sklearn.datasets import load_diabetes
17 | from sklearn.model_selection import train_test_split
18 |
19 | X, y = load_diabetes(return_X_y=True, as_frame=True)
20 | y = pd.DataFrame(y)
21 | X = X.iloc[:200]
22 | y = y.iloc[:200]
23 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
24 |
25 | glm_reg = GLMRegressor()
26 | glm_reg.fit(X_train, y_train)
27 | y_pred = glm_reg.predict(X_test)
28 | y_pred_proba = glm_reg.predict_proba(X_test)
29 |
30 | assert y_pred.shape == y_test.shape
31 | assert y_pred_proba.shape == y_test.shape
32 |
33 |
34 | @pytest.mark.skipif(
35 | not run_test_for_class(GLMRegressor),
36 | reason="run test only if softdeps are present and incrementally (if requested)",
37 | )
38 | def test_glm_with_offset_exposure():
39 | """Test GLM with offset_var and exposure_var parameters."""
40 | import numpy as np
41 | from sklearn.datasets import load_diabetes
42 | from sklearn.model_selection import train_test_split
43 |
44 | X, y = load_diabetes(return_X_y=True, as_frame=True)
45 | y = pd.DataFrame(y)
46 | X = X.iloc[:200]
47 | y = y.iloc[:200]
48 | X["off"] = np.ones(X.shape[0]) * 2.1
49 | X["exp"] = np.arange(1, X.shape[0] + 1)
50 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
51 |
52 | glm_reg = GLMRegressor(
53 | family="Normal", link="Log", offset_var="off", exposure_var=-1
54 | )
55 | glm_reg.fit(X_train, y_train)
56 | y_pred = glm_reg.predict(X_test)
57 | y_pred_proba = glm_reg.predict_proba(X_test)
58 |
59 | assert y_pred.shape == y_test.shape
60 | assert y_pred_proba.shape == y_test.shape
61 |
--------------------------------------------------------------------------------
/docs/source/api_reference/utils.rst:
--------------------------------------------------------------------------------
1 | .. _utils_ref:
2 |
3 | Utility functions
4 | =================
5 |
6 | ``skpro`` has a number of modules dedicated to utilities:
7 |
8 | * :mod:`skpro.datatypes`, which contains utilities for data format checks and conversion.
9 | * :mod:`skpro.registry`, which contains utilities for estimator and tag search
10 | * :mod:`skpro.utils`, which contains generic utility functions.
11 |
12 |
13 | Data Format Checking and Conversion
14 | -----------------------------------
15 |
16 | :mod:`skpro.datatypes`
17 |
18 | .. automodule:: skpro.datatypes
19 | :no-members:
20 | :no-inherited-members:
21 |
22 | .. currentmodule:: skpro.datatypes
23 |
24 | .. autosummary::
25 | :toctree: auto_generated/
26 | :template: function.rst
27 |
28 | convert_to
29 | convert
30 | check_raise
31 | check_is_mtype
32 | check_is_scitype
33 | mtype
34 | scitype
35 | mtype_to_scitype
36 | scitype_to_mtype
37 |
38 |
39 | Estimator Search and Retrieval, Estimator Tags
40 | ----------------------------------------------
41 |
42 | :mod:`skpro.registry`
43 |
44 | .. automodule:: skpro.registry
45 | :no-members:
46 | :no-inherited-members:
47 |
48 | .. currentmodule:: skpro.registry
49 |
50 | .. autosummary::
51 | :toctree: auto_generated/
52 | :template: function.rst
53 |
54 | all_objects
55 | all_tags
56 | check_tag_is_valid
57 |
58 |
59 | Plotting
60 | --------
61 |
62 | :mod:`skpro.utils.plotting`
63 |
64 | .. automodule:: skpro.utils.plotting
65 | :no-members:
66 | :no-inherited-members:
67 |
68 | .. currentmodule:: skpro.utils.plotting
69 |
70 | .. autosummary::
71 | :toctree: auto_generated/
72 | :template: function.rst
73 |
74 | plot_crossplot_interval
75 | plot_crossplot_std
76 | plot_crossplot_loss
77 |
78 |
79 | Estimator Validity Checking
80 | ---------------------------
81 |
82 | :mod:`skpro.utils.estimator_checks`
83 |
84 | .. automodule:: skpro.utils.estimator_checks
85 | :no-members:
86 | :no-inherited-members:
87 |
88 | .. currentmodule:: skpro.utils.estimator_checks
89 |
90 | .. autosummary::
91 | :toctree: auto_generated/
92 | :template: function.rst
93 |
94 | check_estimator
95 |
--------------------------------------------------------------------------------
/skpro/distributions/tests/test_qpd.py:
--------------------------------------------------------------------------------
1 | """Tests for quantile-parameterized distributions."""
2 |
3 | import numpy as np
4 | import pytest
5 |
6 | from skpro.distributions.qpd import QPD_B, QPD_S, QPD_U
7 | from skpro.tests.test_switch import run_test_for_class
8 |
9 |
10 | @pytest.mark.skipif(
11 | not run_test_for_class(QPD_B),
12 | reason="run test only if softdeps are present and incrementally (if requested)", #
13 | )
14 | def test_qpd_b_simple_use():
15 | """Test simple use of qpd with bounded mode."""
16 | qpd = QPD_B(
17 | alpha=0.2,
18 | qv_low=[1, 2],
19 | qv_median=[3, 4],
20 | qv_high=[5, 6],
21 | lower=0,
22 | upper=10,
23 | )
24 |
25 | qpd.mean()
26 |
27 |
28 | @pytest.mark.skipif(
29 | not run_test_for_class(QPD_B),
30 | reason="run test only if softdeps are present and incrementally (if requested)", #
31 | )
32 | def test_qpd_b_pdf():
33 | """Test pdf of qpd with bounded mode."""
34 | # these parameters should produce a uniform on -0.5, 0.5
35 | qpd_linear = QPD_B(
36 | alpha=0.2,
37 | qv_low=-0.3,
38 | qv_median=0,
39 | qv_high=0.3,
40 | lower=-0.5,
41 | upper=0.5,
42 | )
43 | x = np.linspace(-0.45, 0.45, 100)
44 | pdf_vals = [qpd_linear.pdf(x_) for x_ in x]
45 | np.testing.assert_allclose(pdf_vals, 1.0, rtol=1e-5)
46 |
47 |
48 | @pytest.mark.skipif(
49 | not run_test_for_class(QPD_S),
50 | reason="run test only if softdeps are present and incrementally (if requested)",
51 | )
52 | def test_qpd_s_simple_use():
53 | """Test simple use of qpd with semi-bounded mode."""
54 | qpd = QPD_S(
55 | alpha=0.2,
56 | qv_low=[1, 2],
57 | qv_median=[3, 4],
58 | qv_high=[5, 6],
59 | lower=0,
60 | )
61 |
62 | qpd.mean()
63 |
64 |
65 | @pytest.mark.skipif(
66 | not run_test_for_class(QPD_U),
67 | reason="run test only if softdeps are present and incrementally (if requested)",
68 | )
69 | def test_qpd_u_simple_use():
70 | """Test simple use of qpd with un-bounded mode."""
71 | qpd = QPD_U(
72 | alpha=0.2,
73 | qv_low=[1, 2],
74 | qv_median=[3, 4],
75 | qv_high=[5, 6],
76 | )
77 |
78 | qpd.mean()
79 |
--------------------------------------------------------------------------------
/skpro/distributions/negative_binomial.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Negative binomial probability distribution."""
3 |
4 | __author__ = ["tingiskhan"]
5 |
6 | import pandas as pd
7 | from numpy.typing import ArrayLike
8 | from scipy.stats import nbinom, rv_discrete
9 |
10 | from skpro.distributions.adapters.scipy import _ScipyAdapter
11 |
12 |
13 | class NegativeBinomial(_ScipyAdapter):
14 | """Negative binomial distribution.
15 |
16 | Most methods wrap ``scipy.stats.nbinom``.
17 |
18 | Parameters
19 | ----------
20 | mu : ArrayLike
21 | mean of the distribution.
22 | alpha: ArrayLike
23 | dispersion of distribution.
24 |
25 | index : pd.Index, optional, default = RangeIndex
26 | columns : pd.Index, optional, default = RangeIndex
27 |
28 | Examples
29 | --------
30 | >>> from skpro.distributions import NegativeBinomial
31 |
32 | >>> distr = NegativeBinomial(mu=1.0, alpha=1.0)
33 | """
34 |
35 | _tags = {
36 | "capabilities:approx": ["energy"],
37 | "capabilities:exact": ["mean", "var", "pmf", "log_pmf", "cdf", "ppf"],
38 | "distr:measuretype": "discrete",
39 | "distr:paramtype": "parametric",
40 | "broadcast_init": "on",
41 | }
42 |
43 | def __init__(self, mu: ArrayLike, alpha: ArrayLike, index=None, columns=None):
44 | self.mu = mu
45 | self.alpha = alpha
46 |
47 | super().__init__(index=index, columns=columns)
48 |
49 | def _get_scipy_object(self) -> rv_discrete:
50 | return nbinom
51 |
52 | def _get_scipy_param(self) -> dict:
53 | mu = self._bc_params["mu"]
54 | alpha = self._bc_params["alpha"]
55 |
56 | n = alpha
57 | p = alpha / (alpha + mu)
58 |
59 | return [n, p], {}
60 |
61 | @classmethod
62 | def get_test_params(cls, parameter_set="default"):
63 | """Return testing parameter settings for the estimator."""
64 | params1 = {"mu": [[1, 1], [2, 3], [4, 5]], "alpha": 2.0}
65 | params2 = {
66 | "mu": 1.0,
67 | "alpha": 2.0,
68 | "index": pd.Index([1, 2, 5]),
69 | "columns": pd.Index(["a", "b"]),
70 | }
71 | return [params1, params2]
72 |
--------------------------------------------------------------------------------
/skpro/distributions/geometric.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Geometric probability distribution."""
3 |
4 | __author__ = ["aryabhatta-dey"]
5 |
6 | import pandas as pd
7 | from scipy.stats import geom, rv_discrete
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class Geometric(_ScipyAdapter):
13 | r"""Geometric Distribution.
14 |
15 | Most methods wrap ``scipy.stats.geom``.
16 |
17 | The Geometric distribution is parameterized by the probability of
18 | success :math:`p` in a given trial
19 | such that the probability mass function (PMF) is given by:
20 |
21 | .. math:: P(X = k) = p(1 - p)^{k - 1} \quad \text{where} \quad k = 1, 2, 3, \ldots
22 |
23 | Parameters
24 | ----------
25 | p : float or array of float (1D or 2D), must be in (0, 1]
26 | index : pd.Index, optional, default = RangeIndex
27 | columns : pd.Index, optional, default = RangeIndex
28 |
29 | Examples
30 | --------
31 | >>> from skpro.distributions.geometric import Geometric
32 | >>> d = Geometric(p=0.5)
33 | """
34 |
35 | _tags = {
36 | "capabilities:approx": ["pmf"],
37 | "capabilities:exact": ["mean", "var", "pmf", "log_pmf", "cdf", "ppf"],
38 | "distr:measuretype": "discrete",
39 | "distr:paramtype": "parametric",
40 | "broadcast_init": "on",
41 | }
42 |
43 | def __init__(self, p, index=None, columns=None):
44 | self.p = p
45 |
46 | super().__init__(index=index, columns=columns)
47 |
48 | def _get_scipy_object(self) -> rv_discrete:
49 | return geom
50 |
51 | def _get_scipy_param(self):
52 | p = self._bc_params["p"]
53 |
54 | return [], {"p": p}
55 |
56 | @classmethod
57 | def get_test_params(cls, parameter_set="default"):
58 | """Return testing parameter settings for the estimator."""
59 | # array case examples
60 | params1 = {"p": [0.2, 0.5, 0.8]}
61 | params2 = {
62 | "p": 0.4,
63 | "index": pd.Index([1, 2, 5]),
64 | "columns": pd.Index(["a", "b"]),
65 | }
66 |
67 | # scalar case examples
68 | params3 = {"p": 0.7}
69 |
70 | return [params1, params2, params3]
71 |
--------------------------------------------------------------------------------
/skpro/registry/_scitype.py:
--------------------------------------------------------------------------------
1 | """Utility to determine scitype of estimator, based on base class type."""
2 |
3 | __author__ = ["fkiraly"]
4 |
5 | from inspect import isclass
6 |
7 |
8 | def scitype(obj, force_single_scitype=True, coerce_to_list=False):
9 | """Determine scitype string of obj.
10 |
11 | Parameters
12 | ----------
13 | obj : class or object inheriting from sktime BaseObject
14 | force_single_scitype : bool, optional, default = True
15 | whether only a single scitype is returned
16 | if True, only the *first* scitype found will be returned
17 | order is determined by the order in BASE_CLASS_REGISTER
18 | coerce_to_list : bool, optional, default = False
19 | whether return should be coerced to list, even if only one scitype is identified
20 |
21 | Returns
22 | -------
23 | scitype : str, or list of str of sktime scitype strings from BASE_CLASS_REGISTER
24 | str, sktime scitype string, if exactly one scitype can be determined for obj
25 | or force_single_scitype is True, and if coerce_to_list is False
26 | list of str, of scitype strings, if more than one scitype are determined,
27 | or if coerce_to_list is True
28 | obj has scitype if it inherits from class in same row of BASE_CLASS_REGISTER
29 |
30 | Raises
31 | ------
32 | TypeError if no scitype can be determined for obj
33 | """
34 | # if object has tag, return tag
35 | if hasattr(obj, "get_tag"):
36 | if isclass(obj):
37 | tag_type = obj.get_class_tag("object_type", None)
38 | else:
39 | tag_type = obj.get_tag("object_type", None, raise_error=False)
40 | if tag_type is not None:
41 | if coerce_to_list and not isinstance(tag_type, list):
42 | scitypes = [tag_type]
43 | else:
44 | scitypes = tag_type
45 | else:
46 | scitypes = ["object"]
47 |
48 | if isinstance(scitypes, list) and len(scitypes) == 0:
49 | raise TypeError("Error, no scitype could be determined for obj")
50 |
51 | if isinstance(scitypes, list) and force_single_scitype:
52 | scitypes = [scitypes[0]]
53 |
54 | if isinstance(scitypes, list) and len(scitypes) == 1 and not coerce_to_list:
55 | return scitypes[0]
56 |
57 | return scitypes
58 |
--------------------------------------------------------------------------------
/docs/source/about.rst:
--------------------------------------------------------------------------------
1 | .. _top_level_about:
2 |
3 | =====
4 | About
5 | =====
6 |
7 | Learn more about the ``skpro`` project and its community.
8 |
9 | .. toctree::
10 | :maxdepth: 1
11 | :hidden:
12 |
13 | about/mission
14 | about/history
15 | about/team
16 | about/contributors
17 | about/governance
18 | about/roadmap
19 |
20 | .. grid:: 1 2 2 2
21 | :gutter: 3
22 |
23 | .. grid-item-card:: Mission
24 | :text-align: center
25 |
26 | ``skpro``'s mission.
27 |
28 | +++
29 |
30 | .. button-ref:: about/mission
31 | :color: primary
32 | :click-parent:
33 | :expand:
34 |
35 | Mission
36 |
37 | .. grid-item-card:: History
38 | :text-align: center
39 |
40 | Learn about ``skpro``'s history.
41 |
42 | +++
43 |
44 | .. button-ref:: about/history
45 | :color: primary
46 | :click-parent:
47 | :expand:
48 |
49 | History
50 |
51 | .. grid-item-card:: Development Team
52 | :text-align: center
53 |
54 | ``skpro``'s core development team.
55 |
56 | +++
57 |
58 | .. button-ref:: about/team
59 | :color: primary
60 | :click-parent:
61 | :expand:
62 |
63 | Development Team
64 |
65 | .. grid-item-card:: Contributors
66 | :text-align: center
67 |
68 | All of ``skpro``'s contributors.
69 |
70 | +++
71 |
72 | .. button-ref:: about/contributors
73 | :color: primary
74 | :click-parent:
75 | :expand:
76 |
77 | Contributors
78 |
79 | .. grid-item-card:: Governance
80 | :text-align: center
81 |
82 | How we govern the project.
83 |
84 | +++
85 |
86 | .. button-ref:: about/governance
87 | :color: primary
88 | :click-parent:
89 | :expand:
90 |
91 | Governance
92 |
93 | .. grid-item-card:: Roadmap
94 | :text-align: center
95 |
96 | Where we plan to take ``skpro``.
97 |
98 | +++
99 |
100 | .. button-ref:: about/roadmap
101 | :color: primary
102 | :click-parent:
103 | :expand:
104 |
105 | Roadmap
106 |
--------------------------------------------------------------------------------
/skpro/distributions/base/tests/test_multiindex.py:
--------------------------------------------------------------------------------
1 | """Test cases for the MultiIndex functionality of the BaseDistribution.
2 |
3 | Uses the Normal distribution, but is intended to trigger the base layer.
4 | """
5 |
6 | import numpy as np
7 | import pandas as pd
8 | import pytest
9 |
10 | from skpro.distributions.normal import Normal
11 |
12 |
13 | @pytest.fixture
14 | def normal_dist():
15 | ix = pd.MultiIndex.from_product([(1, 2), (2, 3)])
16 | return Normal(np.array([[1, 2], [2, 3], [4, 5], [6, 7]]), 2, index=ix)
17 |
18 |
19 | def test_loc_partial_level(normal_dist):
20 | result = normal_dist.loc[1]
21 | expected_index = pd.MultiIndex.from_tuples([(1, 2), (1, 3)])
22 | np.testing.assert_array_equal(result.index, expected_index)
23 | assert result.mean().shape == (2, 2)
24 |
25 |
26 | def test_loc_full_tuple(normal_dist):
27 | result = normal_dist.loc[(2, 2)]
28 | expected_index = pd.MultiIndex.from_tuples([(2, 2)])
29 | np.testing.assert_array_equal(result.index, expected_index)
30 | assert result.mean().shape == (1, 2)
31 |
32 |
33 | def test_loc_list_of_keys(normal_dist):
34 | result = normal_dist.loc[[(1, 2), (2, 3)]]
35 | expected_index = pd.MultiIndex.from_tuples([(1, 2), (2, 3)])
36 | np.testing.assert_array_equal(result.index, expected_index)
37 | assert result.mean().shape == (2, 2)
38 |
39 |
40 | def test_iloc_single_row(normal_dist):
41 | result = normal_dist.iloc[0]
42 | expected_index = pd.MultiIndex.from_tuples([(1, 2)])
43 | np.testing.assert_array_equal(result.index, expected_index)
44 | assert result.mean().shape == (1, 2)
45 |
46 |
47 | def test_iloc_multiple_rows(normal_dist):
48 | result = normal_dist.iloc[[0, 3]]
49 | expected_index = pd.MultiIndex.from_tuples([(1, 2), (2, 3)])
50 | np.testing.assert_array_equal(result.index, expected_index)
51 | assert result.mean().shape == (2, 2)
52 |
53 |
54 | def test_iloc_column_slice(normal_dist):
55 | result = normal_dist.iloc[:, 1]
56 | expected_index = normal_dist.index
57 | assert result.mean().shape == (4, 1)
58 | np.testing.assert_array_equal(result.index, expected_index)
59 |
60 |
61 | def test_loc_row_col(normal_dist):
62 | result = normal_dist.loc[(1, 2), :]
63 | expected_index = pd.MultiIndex.from_tuples([(1, 2)])
64 | assert result.mean().shape == (1, 2)
65 | np.testing.assert_array_equal(result.index, expected_index)
66 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.4.0
4 | hooks:
5 | - id: check-added-large-files
6 | args: ["--maxkb=1000"]
7 | - id: check-case-conflict
8 | - id: check-merge-conflict
9 | - id: check-symlinks
10 | - id: check-yaml
11 | - id: debug-statements
12 | - id: end-of-file-fixer
13 | exclude: "^docs/source/examples/"
14 | - id: fix-encoding-pragma
15 | args:
16 | - --remove
17 | - id: requirements-txt-fixer
18 | - id: trailing-whitespace
19 |
20 | - repo: https://github.com/asottile/pyupgrade
21 | rev: v3.10.1
22 | hooks:
23 | - id: pyupgrade
24 | args:
25 | - --py38-plus
26 |
27 | - repo: https://github.com/pycqa/isort
28 | rev: 5.12.0
29 | hooks:
30 | - id: isort
31 | name: isort
32 |
33 | - repo: https://github.com/psf/black
34 | rev: 23.7.0
35 | hooks:
36 | - id: black
37 | language_version: python3
38 | # args: [--line-length 79]
39 |
40 | - repo: https://github.com/pycqa/flake8
41 | rev: 6.1.0
42 | hooks:
43 | - id: flake8
44 | exclude: docs/conf.py
45 | additional_dependencies: [flake8-bugbear, flake8-print]
46 |
47 | - repo: https://github.com/mgedmin/check-manifest
48 | rev: "0.49"
49 | hooks:
50 | - id: check-manifest
51 | stages: [manual]
52 |
53 | - repo: https://github.com/nbQA-dev/nbQA
54 | rev: 1.7.0
55 | hooks:
56 | - id: nbqa-black
57 | args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells]
58 | additional_dependencies: [black==22.3.0]
59 | - id: nbqa-isort
60 | args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells]
61 | additional_dependencies: [isort==5.6.4]
62 | - id: nbqa-flake8
63 | args: [--nbqa-dont-skip-bad-cells, "--extend-ignore=E402,E203"]
64 | additional_dependencies: [flake8==3.8.3]
65 |
66 | - repo: https://github.com/pycqa/pydocstyle
67 | rev: 6.3.0
68 | hooks:
69 | - id: pydocstyle
70 | args: ["--config=setup.cfg"]
71 |
72 | # We use the Python version instead of the original version which seems to require Docker
73 | # https://github.com/koalaman/shellcheck-precommit
74 | - repo: https://github.com/shellcheck-py/shellcheck-py
75 | rev: v0.9.0.5
76 | hooks:
77 | - id: shellcheck
78 | name: shellcheck
79 |
--------------------------------------------------------------------------------
/skpro/distributions/binomial.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Binomial probability distribution."""
3 |
4 | __author__ = ["meraldoantonio"]
5 |
6 | import pandas as pd
7 | from scipy.stats import binom, rv_discrete
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class Binomial(_ScipyAdapter):
13 | r"""Binomial distribution.
14 |
15 | Most methods wrap ``scipy.stats.binom``.
16 | The Binomial distribution is parameterized by the number of trials :math:`n`
17 | and the probability of success :math:`p`,
18 | such that the probability mass function (PMF) is given by:
19 |
20 | .. math:: P(X = k) = \binom{n}{k} p^k (1-p)^{n-k}
21 |
22 | Parameters
23 | ----------
24 | n : int or array of int (1D or 2D), must be non-negative
25 | p : float or array of float (1D or 2D), must be in [0, 1]
26 | index : pd.Index, optional, default = RangeIndex
27 | columns : pd.Index, optional, default = RangeIndex
28 |
29 | Examples
30 | --------
31 | >>> from skpro.distributions.binomial import Binomial
32 |
33 | >>> d = Binomial(n=[[10, 10], [20, 30], [40, 50]], p=0.5)
34 | """
35 |
36 | _tags = {
37 | "capabilities:approx": ["pmf"],
38 | "capabilities:exact": ["mean", "var", "pmf", "log_pmf", "cdf", "ppf"],
39 | "distr:measuretype": "discrete",
40 | "distr:paramtype": "parametric",
41 | "broadcast_init": "on",
42 | }
43 |
44 | def __init__(self, n, p, index=None, columns=None):
45 | self.n = n
46 | self.p = p
47 |
48 | super().__init__(index=index, columns=columns)
49 |
50 | def _get_scipy_object(self) -> rv_discrete:
51 | return binom
52 |
53 | def _get_scipy_param(self):
54 | n = self._bc_params["n"]
55 | p = self._bc_params["p"]
56 |
57 | return [], {"n": n, "p": p}
58 |
59 | @classmethod
60 | def get_test_params(cls, parameter_set="default"):
61 | """Return testing parameter settings for the estimator."""
62 | # array case examples
63 | params1 = {"n": [[10, 10], [20, 30], [40, 50]], "p": 0.5}
64 | params2 = {
65 | "n": 10,
66 | "p": 0.5,
67 | "index": pd.Index([1, 2, 5]),
68 | "columns": pd.Index(["a", "b"]),
69 | }
70 | # scalar case examples
71 | params3 = {"n": 15, "p": 0.7}
72 |
73 | return [params1, params2, params3]
74 |
--------------------------------------------------------------------------------
/skpro/distributions/left_truncated.py:
--------------------------------------------------------------------------------
1 | """Left Truncated Discrete Distribution."""
2 | from typing import Union
3 |
4 | from skpro.distributions.base import BaseDistribution
5 | from skpro.distributions.truncated import TruncatedDistribution
6 |
7 |
8 | class LeftTruncated(TruncatedDistribution):
9 | r"""A left truncated distribution _not_ including the lower bound.
10 |
11 | See :class:`TruncatedDistribution` for more details.
12 |
13 | Parameters
14 | ----------
15 | distribution : BaseDistribution
16 | The distribution to truncate from the left, _not_ including the lower bound.
17 |
18 | lower : int
19 | The lower bound below which values are truncated (excluded from sampling).
20 |
21 | """
22 |
23 | def __init__(
24 | self,
25 | distribution: BaseDistribution,
26 | lower: Union[float, int],
27 | index=None,
28 | columns=None,
29 | ):
30 | super().__init__(
31 | distribution, lower=lower, upper=None, index=index, columns=columns
32 | )
33 |
34 | def _iloc(self, rowidx=None, colidx=None):
35 | distr = self.distribution.iloc[rowidx, colidx]
36 |
37 | if rowidx is not None:
38 | new_index = self.index[rowidx]
39 | else:
40 | new_index = self.index
41 |
42 | if colidx is not None:
43 | new_columns = self.columns[colidx]
44 | else:
45 | new_columns = self.columns
46 |
47 | cls = type(self)
48 | return cls(
49 | distribution=distr,
50 | lower=self.lower,
51 | index=new_index,
52 | columns=new_columns,
53 | )
54 |
55 | @classmethod
56 | def get_test_params(cls, parameter_set="default"): # noqa: D102
57 | import pandas as pd
58 |
59 | from skpro.distributions import NegativeBinomial
60 |
61 | # scalar
62 | dist = NegativeBinomial(mu=1.0, alpha=1.0)
63 | params1 = {
64 | "distribution": dist,
65 | "lower": 0,
66 | }
67 |
68 | # array
69 | idx = pd.Index([1, 2])
70 | cols = pd.Index(["a", "b"])
71 | n_array = NegativeBinomial(
72 | mu=[[1, 2], [3, 4]], alpha=1.0, columns=cols, index=idx
73 | )
74 | params2 = {
75 | "distribution": n_array,
76 | "lower": 0,
77 | "index": idx,
78 | "columns": cols,
79 | }
80 |
81 | return [params1, params2]
82 |
--------------------------------------------------------------------------------
/skpro/metrics/survival/tests/test_c_harrell.py:
--------------------------------------------------------------------------------
1 | """Tests for Harell's C-index."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | import pandas as pd
5 | import pytest
6 |
7 |
8 | @pytest.mark.parametrize("concordant", [True, False])
9 | @pytest.mark.parametrize("pass_c", ["True", "False", "None"])
10 | @pytest.mark.parametrize("normalization", ["overall", "index"])
11 | def test_charrell_logic(concordant, pass_c, normalization):
12 | """Test the logic of the Harrell's C-index metric.
13 |
14 | Parameters
15 | ----------
16 | concordant : bool, optional, default=True
17 | If True, the test examples are fully concordant.
18 | If False, the test examples are fully discordant.
19 | pass_c : bool, optional, default=True
20 | If True, the ``C_true`` argument is passed to the metric, with censoring data.
21 | If None, the ``C_true`` argument is passed to the metric, with value None.
22 | If False, the ``C_true`` argument is not passed to the metric.
23 | normalization : str, optional, default="overall"
24 | The normalization method for the metric.
25 | """
26 | from skpro.distributions import Normal
27 | from skpro.metrics.survival._c_harrell import ConcordanceHarrell
28 |
29 | # examples are constructed to be fully concordant or discordant,
30 | # depending on the value of `concordant`
31 | y_true = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 4, 3, 2]})
32 | c_true = pd.DataFrame({"a": [1, 0, 1, 0], "b": [0, 1, 0, 1]})
33 | y_pred_mean = pd.DataFrame({"a": [2, 3, 4, 5], "b": [6, 5, 4, 3]})
34 |
35 | if not concordant:
36 | y_pred_mean = -y_pred_mean
37 | y_pred = Normal(y_pred_mean, sigma=1, columns=pd.Index(["a", "b"]))
38 |
39 | # evaluate the metric
40 | metric = ConcordanceHarrell(normalization=normalization, tie_score=int(concordant))
41 | metric_args = {"y_true": y_true, "y_pred": y_pred}
42 | if pass_c == "True":
43 | metric_args["C_true"] = c_true
44 | elif pass_c == "None":
45 | metric_args["C_true"] = c_true
46 |
47 | res = metric(**metric_args)
48 | res_by_index = metric.evaluate_by_index(**metric_args)
49 |
50 | assert res_by_index.shape == y_true.shape
51 |
52 | # test assumptions
53 | # if concordant, the result should be 1
54 | # if discordant, the result should be 0
55 | assert res == concordant
56 |
57 | if normalization == "index":
58 | assert (res_by_index == concordant).all().all()
59 |
--------------------------------------------------------------------------------
/skpro/distributions/erlang.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Erlang probability distribution."""
3 |
4 | __author__ = ["RUPESH-KUMAR01"]
5 |
6 | import pandas as pd
7 | from scipy.stats import erlang
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class Erlang(_ScipyAdapter):
13 | r"""Erlang Distribution.
14 |
15 | Most methods wrap ``scipy.stats.erlang``.
16 |
17 | The Erlang Distribution is parameterized by shape :math:`k`
18 | and rate :math:`\lambda`, such that the pdf is
19 |
20 | .. math:: f(x) = \frac{x^{k-1}\exp\left(-\lambda x\right) \lambda^{k}}{(k-1)!}
21 |
22 | Parameters
23 | ----------
24 | rate : float or array of float (1D or 2D)
25 | Represents the rate parameter, which is also the inverse of the scale parameter.
26 | k : int or array of int (1D or 2D), optional, default = 1
27 | Represents the shape parameter.
28 | index : pd.Index, optional, default = RangeIndex
29 | columns : pd.Index, optional, default = RangeIndex
30 |
31 | Examples
32 | --------
33 | >>> from skpro.distributions.erlang import Erlang
34 |
35 | >>> d = Erlang(rate=[[1, 1], [2, 3], [4, 5]], k=2)
36 | """
37 |
38 | _tags = {
39 | "capabilities:approx": ["energy", "pdfnorm"],
40 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
41 | "distr:measuretype": "continuous",
42 | "distr:paramtype": "parametric",
43 | "broadcast_init": "on",
44 | }
45 |
46 | def __init__(self, rate, k=1, index=None, columns=None):
47 | self.rate = rate
48 | self.k = k
49 |
50 | super().__init__(index=index, columns=columns)
51 |
52 | def _get_scipy_object(self):
53 | return erlang
54 |
55 | def _get_scipy_param(self):
56 | rate = self._bc_params["rate"]
57 | k = self._bc_params["k"]
58 |
59 | return [], {"scale": 1 / rate, "a": k}
60 |
61 | @classmethod
62 | def get_test_params(cls, parameter_set="default"):
63 | """Return testing parameter settings for the estimator."""
64 | # Array case examples
65 | params1 = {
66 | "rate": 2.0,
67 | "k": 3,
68 | "index": pd.Index([0, 1, 2]),
69 | "columns": pd.Index(["x", "y"]),
70 | }
71 | # Scalar case examples
72 | params2 = {"rate": 0.8, "k": 2}
73 |
74 | params3 = {"rate": 3.0, "k": 1}
75 |
76 | return [params1, params2, params3]
77 |
--------------------------------------------------------------------------------
/skpro/regression/tests/test_cyclic_boosting.py:
--------------------------------------------------------------------------------
1 | """Tests for cyclic boosting regressor."""
2 |
3 | import pandas as pd
4 | import pytest
5 |
6 | from skpro.regression.cyclic_boosting import CyclicBoosting
7 | from skpro.tests.test_switch import run_test_for_class
8 |
9 |
10 | @pytest.mark.skipif(
11 | not run_test_for_class(CyclicBoosting),
12 | reason="run test only if softdeps are present and incrementally (if requested)",
13 | )
14 | def test_cyclic_boosting_simple_use():
15 | """Test simple use of cyclic boosting regressor."""
16 | from sklearn.datasets import load_diabetes
17 | from sklearn.model_selection import train_test_split
18 |
19 | X, y = load_diabetes(return_X_y=True, as_frame=True)
20 | y = pd.DataFrame(y)
21 | X = X.iloc[:200]
22 | y = y.iloc[:200]
23 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
24 |
25 | reg_proba = CyclicBoosting()
26 | reg_proba.fit(X_train, y_train)
27 | y_pred = reg_proba.predict_proba(X_test)
28 |
29 | assert y_pred.shape == y_test.shape
30 |
31 |
32 | @pytest.mark.skipif(
33 | not run_test_for_class(CyclicBoosting),
34 | reason="run test only if softdeps are present and incrementally (if requested)",
35 | )
36 | def test_cyclic_boosting_with_manual_parameters():
37 | """Test use of cyclic boosting regressor with_manual_parameters."""
38 | from cyclic_boosting import flags
39 | from sklearn.datasets import load_diabetes
40 | from sklearn.model_selection import train_test_split
41 |
42 | X, y = load_diabetes(return_X_y=True, as_frame=True)
43 | y = pd.DataFrame(y)
44 | X = X.iloc[:200]
45 | y = y.iloc[:200]
46 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
47 |
48 | features = [
49 | "age",
50 | "sex",
51 | "bmi",
52 | "bp",
53 | "s1",
54 | "s2",
55 | "s3",
56 | ("age", "sex"),
57 | ]
58 |
59 | fp = {
60 | "age": flags.IS_UNORDERED,
61 | "sex": flags.IS_UNORDERED,
62 | "bmi": flags.IS_CONTINUOUS,
63 | "bp": flags.IS_CONTINUOUS,
64 | "s1": flags.IS_CONTINUOUS,
65 | "s2": flags.IS_CONTINUOUS,
66 | "s3": flags.IS_CONTINUOUS,
67 | }
68 |
69 | reg_proba = CyclicBoosting(
70 | feature_groups=features,
71 | feature_properties=fp,
72 | maximal_iterations=5,
73 | alpha=0.25,
74 | mode="additive",
75 | lower=0.0,
76 | )
77 | reg_proba.fit(X_train, y_train)
78 | y_pred = reg_proba.predict_proba(X_test)
79 |
80 | assert y_pred.shape == y_test.shape
81 |
--------------------------------------------------------------------------------
/skpro/distributions/loggamma.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Log-Gamma probability distribution."""
3 |
4 | __author__ = ["ali-john"]
5 |
6 | import pandas as pd
7 | from scipy.stats import loggamma, rv_continuous
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class LogGamma(_ScipyAdapter):
13 | r"""Log-Gamma Distribution.
14 |
15 | Most methods wrap ``scipy.stats.loggamma``.
16 |
17 | The Log-Gamma distribution is a continuous probability distribution
18 | whose logarithm is related to the gamma distribution. It is useful
19 | in extreme value theory and reliability analysis.
20 |
21 | The Log-Gamma distribution is parameterized by the shape parameter
22 | :math:`c`, such that the pdf is
23 |
24 | .. math:: f(x) = \frac{\exp(cx - \exp(x))}{\Gamma(c)}
25 |
26 | where :math:`\Gamma(c)` is the Gamma function.
27 |
28 | The shape parameter :math:`c` is represented by the parameter ``c``.
29 |
30 | Parameters
31 | ----------
32 | c : float or array of float (1D or 2D), must be positive
33 | shape parameter of the log-gamma distribution
34 | index : pd.Index, optional, default = RangeIndex
35 | columns : pd.Index, optional, default = RangeIndex
36 |
37 | Examples
38 | --------
39 | >>> from skpro.distributions.loggamma import LogGamma
40 |
41 | >>> d = LogGamma(c=[[1, 2], [3, 4], [5, 6]])
42 | """
43 |
44 | _tags = {
45 | "capabilities:approx": ["energy", "pdfnorm"],
46 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
47 | "distr:measuretype": "continuous",
48 | "distr:paramtype": "parametric",
49 | "broadcast_init": "on",
50 | }
51 |
52 | def __init__(self, c, index=None, columns=None):
53 | self.c = c
54 |
55 | super().__init__(index=index, columns=columns)
56 |
57 | def _get_scipy_object(self) -> rv_continuous:
58 | return loggamma
59 |
60 | def _get_scipy_param(self):
61 | c = self._bc_params["c"]
62 |
63 | return [c], {}
64 |
65 | @classmethod
66 | def get_test_params(cls, parameter_set="default"):
67 | """Return testing parameter settings for the estimator."""
68 | # array case examples
69 | params1 = {"c": [[1, 2], [3, 4]]}
70 | params2 = {
71 | "c": 2,
72 | "index": pd.Index([1, 2, 5]),
73 | "columns": pd.Index(["a", "b"]),
74 | }
75 | # scalar case examples
76 | params3 = {"c": 1.5}
77 |
78 | return [params1, params2, params3]
79 |
--------------------------------------------------------------------------------
/skpro/metrics/tests/test_distr_metrics.py:
--------------------------------------------------------------------------------
1 | """Tests for probabilistic metrics for distribution predictions."""
2 | import numpy as np
3 | import pandas as pd
4 | import pytest
5 | from skbase.testing import QuickTester
6 |
7 | from skpro.distributions import Normal
8 | from skpro.tests.test_all_estimators import BaseFixtureGenerator, PackageConfig
9 |
10 | TEST_DISTS = [Normal]
11 |
12 |
13 | class TestAllDistrMetrics(PackageConfig, BaseFixtureGenerator, QuickTester):
14 | """Generic tests for all probabilistic regression metrics in the package."""
15 |
16 | # class variables which can be overridden by descendants
17 | # ------------------------------------------------------
18 |
19 | # which object types are generated; None=all, or scitype string
20 | # passed to skpro.registry.all_objects as object_type
21 | object_type_filter = "metric_distr"
22 |
23 | @pytest.mark.parametrize("dist", TEST_DISTS)
24 | @pytest.mark.parametrize("pass_c", [True, False])
25 | @pytest.mark.parametrize("multivariate", [True, False])
26 | @pytest.mark.parametrize("multioutput", ["raw_values", "uniform_average"])
27 | def test_distr_evaluate(
28 | self, object_instance, dist, pass_c, multivariate, multioutput
29 | ):
30 | """Test expected output of evaluate functions."""
31 | metric = object_instance
32 |
33 | y_pred = dist.create_test_instance()
34 | y_true = y_pred.sample()
35 |
36 | m = metric.set_params(multioutput=multioutput)
37 | if "multivariate" in metric.get_params():
38 | m = m.set_params(multivariate=multivariate)
39 |
40 | if not multivariate:
41 | expected_cols = y_true.columns
42 | else:
43 | expected_cols = ["score"]
44 |
45 | metric_args = {"y_true": y_true, "y_pred": y_pred}
46 | if pass_c:
47 | c_true = np.random.randint(0, 2, size=y_true.shape)
48 | c_true = pd.DataFrame(c_true, columns=y_true.columns, index=y_true.index)
49 | metric_args["c_true"] = c_true
50 |
51 | res = m.evaluate_by_index(**metric_args)
52 | assert isinstance(res, pd.DataFrame)
53 | assert (res.columns == expected_cols).all()
54 | assert res.shape == (y_true.shape[0], len(expected_cols))
55 |
56 | res = m.evaluate(**metric_args)
57 |
58 | expect_df = not multivariate and multioutput == "raw_values"
59 | if expect_df:
60 | assert isinstance(res, pd.DataFrame)
61 | assert (res.columns == expected_cols).all()
62 | assert res.shape == (1, len(expected_cols))
63 | else:
64 | assert isinstance(res, float)
65 |
--------------------------------------------------------------------------------
/skpro/distributions/fisk.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Log-logistic aka Fisk probability distribution."""
3 |
4 | __author__ = ["fkiraly", "malikrafsan"]
5 |
6 | import pandas as pd
7 | from scipy.stats import fisk, rv_continuous
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class Fisk(_ScipyAdapter):
13 | r"""Fisk distribution, aka log-logistic distribution.
14 |
15 | Most methods wrap ``scipy.stats.fisk``.
16 |
17 | The Fisk distribution is parametrized by a scale parameter :math:`\alpha`
18 | and a shape parameter :math:`\beta`, such that the cumulative distribution
19 | function (CDF) is given by:
20 |
21 | .. math:: F(x) = 1 - \left(1 + \frac{x}{\alpha}\right)^{-\beta}\right)^{-1}
22 |
23 | Parameters
24 | ----------
25 | alpha : float or array of float (1D or 2D), must be positive
26 | scale parameter of the distribution
27 | beta : float or array of float (1D or 2D), must be positive
28 | shape parameter of the distribution
29 | index : pd.Index, optional, default = RangeIndex
30 | columns : pd.Index, optional, default = RangeIndex
31 |
32 | Examples
33 | --------
34 | >>> from skpro.distributions.fisk import Fisk
35 |
36 | >>> d = Fisk(beta=[[1, 1], [2, 3], [4, 5]], alpha=2)
37 | """
38 |
39 | _tags = {
40 | "capabilities:approx": ["energy", "pdfnorm"],
41 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
42 | "distr:measuretype": "continuous",
43 | "distr:paramtype": "parametric",
44 | "broadcast_init": "on",
45 | }
46 |
47 | def __init__(self, alpha=1, beta=1, index=None, columns=None):
48 | self.alpha = alpha
49 | self.beta = beta
50 |
51 | super().__init__(index=index, columns=columns)
52 |
53 | def _get_scipy_object(self) -> rv_continuous:
54 | return fisk
55 |
56 | def _get_scipy_param(self):
57 | alpha = self._bc_params["alpha"]
58 | beta = self._bc_params["beta"]
59 |
60 | return [], {"c": beta, "scale": alpha}
61 |
62 | @classmethod
63 | def get_test_params(cls, parameter_set="default"):
64 | """Return testing parameter settings for the estimator."""
65 | # array case examples
66 | params1 = {"alpha": [[1, 1], [2, 3], [4, 5]], "beta": 3}
67 | params2 = {
68 | "alpha": 2,
69 | "beta": 3,
70 | "index": pd.Index([1, 2, 5]),
71 | "columns": pd.Index(["a", "b"]),
72 | }
73 | # scalar case examples
74 | params3 = {"alpha": 1.5, "beta": 2.1}
75 |
76 | return [params1, params2, params3]
77 |
--------------------------------------------------------------------------------
/skpro/distributions/halfnormal.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Half-Normal probability distribution."""
3 |
4 | __author__ = ["SaiRevanth25"]
5 |
6 | import pandas as pd
7 | from scipy.stats import halfnorm, rv_continuous
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class HalfNormal(_ScipyAdapter):
13 | r"""Half-Normal distribution.
14 |
15 | Most methods wrap ``scipy.stats.halfnorm``.
16 |
17 | This distribution is univariate, without correlation between dimensions
18 | for the array-valued case.
19 |
20 | The distribution is `cut off` at :math:`\( x = 0 \)`. There is no mass assigned to
21 | negative values; they are entirely excluded from the distribution.
22 |
23 | The half-normal distribution is parametrized by the standard deviation
24 | :math:`\sigma`, such that the pdf is
25 |
26 | .. math:: f(x) = \frac{\sqrt{2}}{\sigma \sqrt{\pi}}
27 | \exp\left(-\frac{x^2}{2\sigma^2}\right), x>0 otherwise 0
28 |
29 | The standard deviation :math:`\sigma` is represented by the parameter ``sigma``.
30 |
31 | Parameters
32 | ----------
33 | sigma : float or array of float (1D or 2D), must be positive
34 | standard deviation of the half-normal distribution
35 | index : pd.Index, optional, default = RangeIndex
36 | columns : pd.Index, optional, default = RangeIndex
37 |
38 | Examples
39 | --------
40 | >>> from skpro.distributions.halfnormal import HalfNormal
41 |
42 | >>> hn = HalfNormal(sigma=1)
43 | """
44 |
45 | _tags = {
46 | "capabilities:approx": ["pdfnorm"],
47 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
48 | "distr:measuretype": "continuous",
49 | "distr:paramtype": "parametric",
50 | "broadcast_init": "on",
51 | }
52 |
53 | def __init__(self, sigma, index=None, columns=None):
54 | self.sigma = sigma
55 |
56 | super().__init__(index=index, columns=columns)
57 |
58 | def _get_scipy_object(self) -> rv_continuous:
59 | return halfnorm
60 |
61 | def _get_scipy_param(self):
62 | sigma = self._bc_params["sigma"]
63 | return [sigma], {}
64 |
65 | @classmethod
66 | def get_test_params(cls, parameter_set="default"):
67 | """Return testing parameter settings for the estimator."""
68 | # array case examples
69 | params1 = {"sigma": [[1, 2], [3, 4]]}
70 | params2 = {
71 | "sigma": 1,
72 | "index": pd.Index([1, 2, 5]),
73 | "columns": pd.Index(["a", "b"]),
74 | }
75 | # scalar case examples
76 | params3 = {"sigma": 2}
77 | return [params1, params2, params3]
78 |
--------------------------------------------------------------------------------
/skpro/distributions/inversegamma.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Inverse Gamma probability distribution."""
3 |
4 | __author__ = ["meraldoantonio"]
5 |
6 | import pandas as pd
7 | from scipy.stats import invgamma, rv_continuous
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class InverseGamma(_ScipyAdapter):
13 | r"""Inverse Gamma Distribution.
14 |
15 | Most methods wrap ``scipy.stats.invgamma``.
16 |
17 | The Inverse Gamma Distribution is parameterized by shape :math:`\alpha` and
18 | scale :math:`\beta`, such that the pdf is
19 |
20 | .. math:: f(x) = \frac{\beta^{\alpha} x^{-\alpha-1} \exp\left(-\frac{\beta}{x}\right)}{\tau(\alpha)}
21 |
22 | where :math:`\tau(\alpha)` is the Gamma function.
23 | For all positive integers, :math:`\tau(\alpha) = (\alpha-1)!`.
24 |
25 | Parameters
26 | ----------
27 | alpha : float or array of float (1D or 2D)
28 | The shape parameter.
29 | beta : float or array of float (1D or 2D)
30 | The scale parameter.
31 | index : pd.Index, optional, default = RangeIndex
32 | columns : pd.Index, optional, default = RangeIndex
33 |
34 | Examples
35 | --------
36 | >>> from skpro.distributions.inversegamma import InverseGamma
37 |
38 | >>> d = InverseGamma(beta=[[1, 1], [2, 3], [4, 5]], alpha=2)
39 | """ # noqa: E501
40 |
41 | _tags = {
42 | "capabilities:approx": ["energy", "pdfnorm"],
43 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
44 | "distr:measuretype": "continuous",
45 | "distr:paramtype": "parametric",
46 | "broadcast_init": "on",
47 | }
48 |
49 | def __init__(self, alpha, beta, index=None, columns=None):
50 | self.alpha = alpha
51 | self.beta = beta
52 |
53 | super().__init__(index=index, columns=columns)
54 |
55 | def _get_scipy_object(self) -> rv_continuous:
56 | return invgamma
57 |
58 | def _get_scipy_param(self):
59 | alpha = self._bc_params["alpha"]
60 | beta = self._bc_params["beta"]
61 | scale = beta
62 |
63 | return [], {"a": alpha, "scale": scale}
64 |
65 | @classmethod
66 | def get_test_params(cls, parameter_set="default"):
67 | """Return testing parameter settings for the estimator."""
68 | # array case examples
69 | params1 = {"alpha": [6, 2.5], "beta": [[1, 1], [2, 3], [4, 5]]}
70 | params2 = {
71 | "alpha": 2,
72 | "beta": 3,
73 | "index": pd.Index([1, 2, 5]),
74 | "columns": pd.Index(["a", "b"]),
75 | }
76 | # scalar case examples
77 | params3 = {"alpha": 1.5, "beta": 2.1}
78 |
79 | return [params1, params2, params3]
80 |
--------------------------------------------------------------------------------
/skpro/distributions/alpha.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Alpha probability distribution."""
3 |
4 | __author__ = ["SaiReavanth25"]
5 |
6 | import pandas as pd
7 | from scipy.stats import alpha, rv_continuous
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class Alpha(_ScipyAdapter):
13 | r"""Alpha distribution.
14 |
15 | Most methods wrap ``scipy.stats.alpha``.
16 |
17 | The alpha distribution is characterized by its shape parameter :math:`\a`,
18 | which determines its skewness and tail behavior.
19 | It is often used for modeling data with heavy right tails,
20 | unlike the Gaussian distribution(which is symmetric and bell-shaped).
21 |
22 | The probability density function (PDF) of the Alpha distribution is given by:
23 | .. math::
24 |
25 | f(x) = \frac{1}{x^2 \Phi(a) \sqrt{2\pi}}
26 | \exp\left(-\frac{1}{2}\left(\frac{a - 1}{x}\right)^2\right)
27 |
28 | where:
29 | - :math:`a` is the shape parameter.
30 | - :math:`Phi` is the cumulative distribution function (CDF) of the
31 | standard normal distribution.
32 |
33 | Parameters
34 | ----------
35 | a : float or array of float (1D or 2D), must be positive
36 | Shape parameter controlling skewness and tail behavior.
37 | Higher values result in heavier tails and greater skewness towards the right.
38 | index : pd.Index, optional, default = RangeIndex
39 | columns : pd.Index, optional, default = RangeIndex
40 |
41 | Examples
42 | --------
43 | >>> from skpro.distributions import Alpha
44 |
45 | >>> distr = Alpha(a=[[1, 2], [3, 4]])
46 | """
47 |
48 | _tags = {
49 | "capabilities:approx": ["pdfnorm"],
50 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
51 | "distr:measuretype": "continuous",
52 | "distr:paramtype": "parametric",
53 | "broadcast_init": "on",
54 | }
55 |
56 | def __init__(self, a, index=None, columns=None):
57 | self.a = a
58 |
59 | super().__init__(index=index, columns=columns)
60 |
61 | def _get_scipy_object(self) -> rv_continuous:
62 | return alpha
63 |
64 | def _get_scipy_param(self):
65 | a = self._bc_params["a"]
66 |
67 | return [a], {}
68 |
69 | @classmethod
70 | def get_test_params(cls, parameter_set="default"):
71 | """Return testing parameter settings for the estimator."""
72 | params1 = {"a": [[2, 3], [4, 5]]}
73 | params2 = {
74 | "a": 3,
75 | "index": pd.Index([1, 2, 3]),
76 | "columns": pd.Index(["a", "b"]),
77 | }
78 | params3 = {"a": 2.5}
79 |
80 | return [params1, params2, params3]
81 |
--------------------------------------------------------------------------------
/skpro/distributions/loglaplace.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Log-Laplace probability distribution."""
3 |
4 | __author__ = ["SaiRevanth25"]
5 |
6 | import pandas as pd
7 | from scipy.stats import loglaplace, rv_continuous
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class LogLaplace(_ScipyAdapter):
13 | r"""Log-Laplace distribution.
14 |
15 | Most methods wrap ``scipy.stats.loglaplace``.
16 |
17 | This distribution is univariate, without correlation between dimensions
18 | for the array-valued case.
19 |
20 | The log-Laplace distribution is a continuous probability distribution obtained by
21 | taking the logarithm of the Laplace distribution, commonly used in finance and
22 | hydrology due to its heavy tails and asymmetry.
23 |
24 | The log-Laplace distribution is parametrized by the scale parameter
25 | :math:`\c`, such that the pdf is
26 |
27 | .. math:: f(x) = \frac{c}{2} x^{c-1}, \quad 0= 1
32 |
33 | The scale parameter :math:`c` is represented by the parameter ``c``.
34 |
35 | Parameters
36 | ----------
37 | scale : float or array of float (1D or 2D), must be positive
38 | scale parameter of the log-Laplace distribution
39 | index : pd.Index, optional, default = RangeIndex
40 | columns : pd.Index, optional, default = RangeIndex
41 |
42 | Examples
43 | --------
44 | >>> from skpro.distributions.loglaplace import LogLaplace
45 |
46 | >>> ll = LogLaplace(scale=1)
47 | """
48 |
49 | _tags = {
50 | "capabilities:approx": ["pdfnorm"],
51 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
52 | "distr:measuretype": "continuous",
53 | "distr:paramtype": "parametric",
54 | "broadcast_init": "on",
55 | }
56 |
57 | def __init__(self, scale, index=None, columns=None):
58 | self.scale = scale
59 |
60 | super().__init__(index=index, columns=columns)
61 |
62 | def _get_scipy_object(self) -> rv_continuous:
63 | return loglaplace
64 |
65 | def _get_scipy_param(self):
66 | scale = self._bc_params["scale"]
67 | return [scale], {}
68 |
69 | @classmethod
70 | def get_test_params(cls, parameter_set="default"):
71 | """Return testing parameter settings for the estimator."""
72 | # array case examples
73 | params1 = {"scale": [[1, 2], [3, 4]]}
74 | params2 = {
75 | "scale": 1,
76 | "index": pd.Index([1, 2, 5]),
77 | "columns": pd.Index(["a", "b"]),
78 | }
79 | # scalar case examples
80 | params3 = {"scale": 2}
81 | return [params1, params2, params3]
82 |
--------------------------------------------------------------------------------
/skpro/distributions/halfcauchy.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Half-Cauchy probability distribution."""
3 |
4 | __author__ = ["SaiRevanth25"]
5 |
6 | import pandas as pd
7 | from scipy.stats import halfcauchy, rv_continuous
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class HalfCauchy(_ScipyAdapter):
13 | r"""Half-Cauchy distribution.
14 |
15 | Most methods wrap ``scipy.stats.halfcauchy``.
16 |
17 | This distribution is univariate, without correlation between dimensions
18 | for the array-valued case.
19 |
20 | The half-Cauchy distribution is a continuous probability distribution that
21 | is the positive half of the Cauchy distribution. It is commonly used in
22 | Bayesian statistics, especially as a prior distribution for scale parameters
23 | due to its heavy tails and non-negativity.
24 |
25 | The half-Cauchy distribution is parametrized by the scale parameter
26 | :math:`\beta`, such that the pdf is
27 |
28 | .. math::
29 |
30 | f(x) = \frac{2}{\pi \beta \left(1 + \left(\frac{x}{\beta}\right)^2\right)},
31 | x>0 otherwise 0
32 |
33 | The scale parameter :math:`\beta` is represented by the parameter ``beta``.
34 |
35 | Parameters
36 | ----------
37 | beta : float or array of float (1D or 2D), must be positive
38 | scale parameter of the half-Cauchy distribution
39 | index : pd.Index, optional, default = RangeIndex
40 | columns : pd.Index, optional, default = RangeIndex
41 |
42 | Examples
43 | --------
44 | >>> from skpro.distributions.halfcauchy import HalfCauchy
45 |
46 | >>> hc = HalfCauchy(beta=1)
47 | """
48 |
49 | _tags = {
50 | "capabilities:approx": ["pdfnorm"],
51 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
52 | "distr:measuretype": "continuous",
53 | "distr:paramtype": "parametric",
54 | "broadcast_init": "on",
55 | }
56 |
57 | def __init__(self, beta, index=None, columns=None):
58 | self.beta = beta
59 |
60 | super().__init__(index=index, columns=columns)
61 |
62 | def _get_scipy_object(self) -> rv_continuous:
63 | return halfcauchy
64 |
65 | def _get_scipy_param(self):
66 | beta = self._bc_params["beta"]
67 | return [beta], {}
68 |
69 | @classmethod
70 | def get_test_params(cls, parameter_set="default"):
71 | """Return testing parameter settings for the estimator."""
72 | # array case examples
73 | params1 = {"beta": [[1, 2], [3, 4]]}
74 | params2 = {
75 | "beta": 1,
76 | "index": pd.Index([1, 2, 5]),
77 | "columns": pd.Index(["a", "b"]),
78 | }
79 | # scalar case examples
80 | params3 = {"beta": 2}
81 | return [params1, params2, params3]
82 |
--------------------------------------------------------------------------------
/skpro/survival/base.py:
--------------------------------------------------------------------------------
1 | """Base class for probabilistic survival regression."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | from skpro.regression.base import BaseProbaRegressor
5 |
6 | # allowed input mtypes
7 | ALLOWED_MTYPES = [
8 | "pd_DataFrame_Table",
9 | "pd_Series_Table",
10 | "numpy1D",
11 | "numpy2D",
12 | ]
13 |
14 |
15 | class BaseSurvReg(BaseProbaRegressor):
16 | """Base class for survival regression models.
17 |
18 | Contains no additional logic, only docstring overrides.
19 | """
20 |
21 | _tags = {"capability:survival": True}
22 |
23 | def fit(self, X, y, C=None):
24 | """Fit regressor to training data.
25 |
26 | Writes to self:
27 | Sets fitted model attributes ending in "_".
28 |
29 | Changes state to "fitted" = sets is_fitted flag to True
30 |
31 | Parameters
32 | ----------
33 | X : pandas DataFrame
34 | feature instances to fit regressor to
35 | y : pd.DataFrame, must be same length as X
36 | labels to fit regressor to
37 | C : pd.DataFrame, optional (default=None)
38 | censoring information for survival analysis,
39 |
40 | * should have same column name as y, same length as X and y
41 | * should have entries 0 and 1 (float or int),
42 | 0 = uncensored, 1 = (right) censored
43 |
44 | if None, all observations are assumed to be uncensored.
45 |
46 | Returns
47 | -------
48 | self : reference to self
49 | """
50 | super().fit(X=X, y=y, C=C)
51 | return self
52 |
53 | def update(self, X, y, C=None):
54 | """Update regressor with a new batch of training data.
55 |
56 | Only estimators with the ``capability:update`` tag (value ``True``)
57 | provide this method, otherwise the method ignores the call and
58 | discards the data passed.
59 |
60 | State required:
61 | Requires state to be "fitted".
62 |
63 | Writes to self:
64 | Updates fitted model attributes ending in "_".
65 |
66 | Parameters
67 | ----------
68 | X : pandas DataFrame
69 | feature instances to fit regressor to
70 | y : pd.DataFrame, must be same length as X
71 | labels to fit regressor to
72 | C : pd.DataFrame, optional (default=None)
73 | censoring information for survival analysis,
74 | should have same column name as y, same length as X and y
75 | should have entries 0 and 1 (float or int)
76 | 0 = uncensored, 1 = (right) censored
77 | if None, all observations are assumed to be uncensored
78 |
79 | Returns
80 | -------
81 | self : reference to self
82 | """
83 | super().update(X=X, y=y, C=C)
84 | return self
85 |
--------------------------------------------------------------------------------
/skpro/distributions/halflogistic.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Half-Logistic probability distribution."""
3 |
4 | __author__ = ["SaiRevanth25"]
5 |
6 | import pandas as pd
7 | from scipy.stats import halflogistic, rv_continuous
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class HalfLogistic(_ScipyAdapter):
13 | r"""Half-Logistic distribution.
14 |
15 | Most methods wrap ``scipy.stats.halflogistic``.
16 |
17 | This distribution is univariate, without correlation between dimensions
18 | for the array-valued case.
19 |
20 | The half-logistic distribution is a continuous probability distribution derived
21 | from the logistic distribution by taking only the positive half. It is particularly
22 | useful in reliability analysis, lifetime modeling, and other applications where
23 | non-negative values are required.
24 |
25 | The half-logistic distribution is parametrized by the scale parameter
26 | :math:`\beta`, such that the pdf is
27 |
28 | .. math::
29 |
30 | f(x) = \frac{2 \exp\left(-\frac{x}{\beta}\right)}
31 | {\beta \left(1 + \exp\left(-\frac{x}{\beta}\right)\right)^2},
32 | x>0 otherwise 0
33 |
34 | The scale parameter :math:`\beta` is represented by the parameter ``beta``.
35 |
36 | Parameters
37 | ----------
38 | beta : float or array of float (1D or 2D), must be positive
39 | scale parameter of the half-logistic distribution
40 | index : pd.Index, optional, default = RangeIndex
41 | columns : pd.Index, optional, default = RangeIndex
42 |
43 | Examples
44 | --------
45 | >>> from skpro.distributions.halflogistic import HalfLogistic
46 |
47 | >>> hl = HalfLogistic(beta=1)
48 | """
49 |
50 | _tags = {
51 | "capabilities:approx": ["pdfnorm"],
52 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
53 | "distr:measuretype": "continuous",
54 | "distr:paramtype": "parametric",
55 | "broadcast_init": "on",
56 | }
57 |
58 | def __init__(self, beta, index=None, columns=None):
59 | self.beta = beta
60 |
61 | super().__init__(index=index, columns=columns)
62 |
63 | def _get_scipy_object(self) -> rv_continuous:
64 | return halflogistic
65 |
66 | def _get_scipy_param(self):
67 | beta = self._bc_params["beta"]
68 | return [beta], {}
69 |
70 | @classmethod
71 | def get_test_params(cls, parameter_set="default"):
72 | """Return testing parameter settings for the estimator."""
73 | # array case examples
74 | params1 = {"beta": [[1, 2], [3, 4]]}
75 | params2 = {
76 | "beta": 1,
77 | "index": pd.Index([1, 2, 5]),
78 | "columns": pd.Index(["a", "b"]),
79 | }
80 | # scalar case examples
81 | params3 = {"beta": 2}
82 | return [params1, params2, params3]
83 |
--------------------------------------------------------------------------------
/skpro/utils/retrieval.py:
--------------------------------------------------------------------------------
1 | """Utility functions for retrieving objects from modules."""
2 | import importlib
3 | import inspect
4 | import pkgutil
5 | from functools import lru_cache
6 |
7 | EXCLUDE_MODULES_STARTING_WITH = ("all", "test", "contrib")
8 |
9 |
10 | def _all_functions(module_name):
11 | """Get all functions from a module, including submodules.
12 |
13 | Excludes modules starting with 'all' or 'test'.
14 |
15 | Parameters
16 | ----------
17 | module_name : str
18 | Name of the module.
19 |
20 | Returns
21 | -------
22 | functions_list : list
23 | List of tuples (function_name: str, function_object: function).
24 | """
25 | # copy to avoid modifying the cache
26 | return _all_cond(module_name, inspect.isfunction).copy()
27 |
28 |
29 | def _all_classes(module_name):
30 | """Get all classes from a module, including submodules.
31 |
32 | Excludes modules starting with 'all' or 'test'.
33 |
34 | Parameters
35 | ----------
36 | module_name : str
37 | Name of the module.
38 |
39 | Returns
40 | -------
41 | classes_list : list
42 | List of tuples (class_name: str, class_ref: class).
43 | """
44 | # copy to avoid modifying the cache
45 | return _all_cond(module_name, inspect.isclass).copy()
46 |
47 |
48 | @lru_cache
49 | def _all_cond(module_name, cond):
50 | """Get all objects from a module satisfying a condition.
51 |
52 | The condition should be a hashable callable,
53 | of signature ``condition(obj) -> bool``.
54 |
55 | Excludes modules starting with 'all' or 'test'.
56 |
57 | Parameters
58 | ----------
59 | module_name : str
60 | Name of the module.
61 | cond : callable
62 | Condition to satisfy.
63 | Signature: ``condition(obj) -> bool``,
64 | passed as predicate to ``inspect.getmembers``.
65 |
66 | Returns
67 | -------
68 | functions_list : list
69 | List of tuples (function_name, function_object).
70 | """
71 | # Import the package
72 | package = importlib.import_module(module_name)
73 |
74 | # Initialize an empty list to hold all objects
75 | obj_list = []
76 |
77 | # Walk through the package's modules
78 | package_path = package.__path__[0]
79 | for _, modname, _ in pkgutil.walk_packages(
80 | path=[package_path], prefix=package.__name__ + "."
81 | ):
82 | # Skip modules starting with 'all' or 'test'
83 | if modname.split(".")[-1].startswith(EXCLUDE_MODULES_STARTING_WITH):
84 | continue
85 |
86 | # Import the module
87 | module = importlib.import_module(modname)
88 |
89 | # Get all objects from the module
90 | for name, obj in inspect.getmembers(module, cond):
91 | # if object is imported from another module, skip it
92 | if obj.__module__ != module.__name__:
93 | continue
94 | # add the object to the list
95 | obj_list.append((name, obj))
96 |
97 | return obj_list
98 |
--------------------------------------------------------------------------------
/skpro/distributions/inversegaussian.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Inverse Gaussian probability distribution."""
3 |
4 | __author__ = ["Omswastik-11"]
5 |
6 | import pandas as pd
7 | from scipy.stats import invgauss, rv_continuous
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class InverseGaussian(_ScipyAdapter):
13 | r"""Inverse Gaussian distribution, aka Wald distribution.
14 |
15 | Most methods wrap ``scipy.stats.invgauss``.
16 |
17 | The Inverse Gaussian distribution (Wald) when using SciPy's
18 | parameterization is specified by a shape parameter ``mu`` and a
19 | ``scale`` parameter. In SciPy these are the positional and keyword
20 | parameters of ``scipy.stats.invgauss(mu, scale=scale)``. The
21 | mean of the distribution is given by ``mean = mu * scale``.
22 |
23 | The pdf in terms of :math:`\mu` = ``mu`` and :math:`\sigma` = ``scale`` is:
24 |
25 | .. math:: f(x; \mu, \sigma) = \sqrt{\frac{\sigma}{2 \pi x^3}}
26 | \exp\left(-\frac{(x - \mu \sigma)^2}{2 \mu^2 \sigma x}\right)
27 |
28 | Parameters
29 | ----------
30 | mu : float or array of float (1D or 2D), must be positive
31 | shape parameter (dimensionless)
32 | scale : float or array of float (1D or 2D), must be positive
33 | scale parameter (multiplies the distribution)
34 | index : pd.Index, optional, default = RangeIndex
35 | columns : pd.Index, optional, default = RangeIndex
36 |
37 | Examples
38 | --------
39 | >>> from skpro.distributions.inversegaussian import InverseGaussian
40 |
41 | >>> d = InverseGaussian(mu=1.0, scale=1.0)
42 | """
43 |
44 | _tags = {
45 | "capabilities:approx": ["energy", "pdfnorm"],
46 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
47 | "distr:measuretype": "continuous",
48 | "distr:paramtype": "parametric",
49 | "broadcast_init": "on",
50 | }
51 |
52 | def __init__(self, mu, scale, index=None, columns=None):
53 | self.mu = mu
54 | self.scale = scale
55 |
56 | super().__init__(index=index, columns=columns)
57 |
58 | def _get_scipy_object(self) -> rv_continuous:
59 | return invgauss
60 |
61 | def _get_scipy_param(self):
62 | # Pass parameters directly to scipy.stats.invgauss.
63 | # SciPy's invgauss accepts a shape parameter `mu` and a keyword `scale`.
64 | mu = self._bc_params["mu"]
65 | scale = self._bc_params["scale"]
66 |
67 | return [mu], {"scale": scale}
68 |
69 | @classmethod
70 | def get_test_params(cls, parameter_set="default"):
71 | """Return testing parameter settings for the estimator."""
72 | # array case examples
73 | params1 = {"mu": [2, 3.5], "scale": [[1, 1], [2, 3], [4, 5]]}
74 | params2 = {
75 | "mu": 2.5,
76 | "scale": 1.5,
77 | "index": pd.Index([1, 2, 5]),
78 | "columns": pd.Index(["a", "b"]),
79 | }
80 | # scalar case examples
81 | params3 = {"mu": 3.0, "scale": 2.0}
82 |
83 | return [params1, params2, params3]
84 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | Examples of behavior that contributes to creating a positive environment include:
10 |
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 |
17 | Examples of unacceptable behavior by participants include:
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at f.kiraly@ucl.ac.uk. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 |
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 |
--------------------------------------------------------------------------------
/skpro/distributions/__init__.py:
--------------------------------------------------------------------------------
1 | """Probability distribution objects."""
2 |
3 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
4 | # adapted from sktime
5 |
6 | __all__ = [
7 | "Alpha",
8 | "Beta",
9 | "Binomial",
10 | "ChiSquared",
11 | "Delta",
12 | "Empirical",
13 | "Erlang",
14 | "Exponential",
15 | "Fisk",
16 | "Gamma",
17 | "LogGamma",
18 | "Geometric",
19 | "HalfCauchy",
20 | "HalfLogistic",
21 | "HalfNormal",
22 | "Hurdle",
23 | "IID",
24 | "InverseGamma",
25 | "InverseGaussian",
26 | "Histogram",
27 | "Laplace",
28 | "LeftTruncated",
29 | "Logistic",
30 | "LogLaplace",
31 | "LogNormal",
32 | "MeanScale",
33 | "Mixture",
34 | "NegativeBinomial",
35 | "Normal",
36 | "Pareto",
37 | "Poisson",
38 | "QPD_Empirical",
39 | "QPD_S",
40 | "QPD_B",
41 | "QPD_U",
42 | "QPD_Johnson",
43 | "SkewNormal",
44 | "TDistribution",
45 | "TransformedDistribution",
46 | "TruncatedDistribution",
47 | "TruncatedNormal",
48 | "Uniform",
49 | "Weibull",
50 | ]
51 |
52 | from skpro.distributions.alpha import Alpha
53 | from skpro.distributions.beta import Beta
54 | from skpro.distributions.binomial import Binomial
55 | from skpro.distributions.chi_squared import ChiSquared
56 | from skpro.distributions.compose import IID
57 | from skpro.distributions.delta import Delta
58 | from skpro.distributions.empirical import Empirical
59 | from skpro.distributions.erlang import Erlang
60 | from skpro.distributions.exponential import Exponential
61 | from skpro.distributions.fisk import Fisk
62 | from skpro.distributions.gamma import Gamma
63 | from skpro.distributions.geometric import Geometric
64 | from skpro.distributions.halfcauchy import HalfCauchy
65 | from skpro.distributions.halflogistic import HalfLogistic
66 | from skpro.distributions.halfnormal import HalfNormal
67 | from skpro.distributions.histogram import Histogram
68 | from skpro.distributions.hurdle import Hurdle
69 | from skpro.distributions.inversegamma import InverseGamma
70 | from skpro.distributions.inversegaussian import InverseGaussian
71 | from skpro.distributions.laplace import Laplace
72 | from skpro.distributions.left_truncated import LeftTruncated
73 | from skpro.distributions.loggamma import LogGamma
74 | from skpro.distributions.logistic import Logistic
75 | from skpro.distributions.loglaplace import LogLaplace
76 | from skpro.distributions.lognormal import LogNormal
77 | from skpro.distributions.meanscale import MeanScale
78 | from skpro.distributions.mixture import Mixture
79 | from skpro.distributions.negative_binomial import NegativeBinomial
80 | from skpro.distributions.normal import Normal
81 | from skpro.distributions.pareto import Pareto
82 | from skpro.distributions.poisson import Poisson
83 | from skpro.distributions.qpd import QPD_B, QPD_S, QPD_U, QPD_Johnson
84 | from skpro.distributions.qpd_empirical import QPD_Empirical
85 | from skpro.distributions.skew_normal import SkewNormal
86 | from skpro.distributions.t import TDistribution
87 | from skpro.distributions.trafo import TransformedDistribution
88 | from skpro.distributions.truncated import TruncatedDistribution
89 | from skpro.distributions.truncated_normal import TruncatedNormal
90 | from skpro.distributions.uniform import Uniform
91 | from skpro.distributions.weibull import Weibull
92 |
--------------------------------------------------------------------------------
/skpro/datatypes/_convert_utils/_convert.py:
--------------------------------------------------------------------------------
1 | """Conversion utilities for mtypes."""
2 |
3 | __author__ = ["fkiraly"]
4 |
5 |
6 | def _concat(fun1, fun2):
7 | """Concatenation of two converter functions, using the same store.
8 |
9 | Parameters
10 | ----------
11 | fun1, fun2 : functions in converter signature, see datatypes._convert
12 |
13 | Returns
14 | -------
15 | function in converter signature, see datatypes._convert
16 | concatenation fun2 o fun1, using the same store
17 | """
18 |
19 | def concat_fun(obj, store=None):
20 | obj1 = fun1(obj, store=store)
21 | obj2 = fun2(obj1, store=store)
22 | return obj2
23 |
24 | return concat_fun
25 |
26 |
27 | def _extend_conversions(mtype, anchor_mtype, convert_dict, mtype_universe=None):
28 | """Obtain all conversions from and to mtype via conversion to anchor_mtype.
29 |
30 | Mutates convert_dict by adding all conversions from and to mtype.
31 |
32 | Assumes:
33 | convert_dict contains
34 | * conversion from `mtype` to `anchor_mtype`
35 | * conversion from `anchor_mtype` to `mtype`
36 | * conversions from `anchor_mtype` to all mtypes in `mtype_universe`
37 | * conversions from all mtypes in `mtype_universe` to `anchor_mtype`
38 |
39 | Guarantees:
40 | convert_dict contains
41 | * conversions from `mtype` to all mtypes in mtype_universe
42 | * conversions from all mtypes in mtype_universe to `mtype`
43 |
44 | conversions not in convert_dict at start are filled in as
45 | _concat(, )
46 | conversions not in convert_dict at start are filled in as
47 | _concat(, )
48 |
49 | Parameters
50 | ----------
51 | mtype : mtype string in convert_dict
52 | anchor_mtype : mtype string in convert_dict
53 | convert_dict : conversion dictionary with entries of converter signature
54 | see docstring of datatypes._convert
55 | mtype_universe : iterable of mtype strings in convert_dict, coercible to list or set
56 |
57 | Returns
58 | -------
59 | reference to convert_dict
60 | CAVEAT: convert_dict passed to this function gets mutated, this is a reference
61 | """
62 | keys = convert_dict.keys()
63 | scitype = list(keys)[0][2]
64 |
65 | if mtype_universe is None:
66 | mtype_universe = {x[1] for x in list(keys)}
67 | mtype_universe = mtype_universe.union([x[0] for x in list(keys)])
68 |
69 | for tp in set(mtype_universe).difference([mtype, anchor_mtype]):
70 | if (anchor_mtype, tp, scitype) in convert_dict.keys():
71 | if (mtype, tp, scitype) not in convert_dict.keys():
72 | convert_dict[(mtype, tp, scitype)] = _concat(
73 | convert_dict[(mtype, anchor_mtype, scitype)],
74 | convert_dict[(anchor_mtype, tp, scitype)],
75 | )
76 | if (tp, anchor_mtype, scitype) in convert_dict.keys():
77 | if (tp, mtype, scitype) not in convert_dict.keys():
78 | convert_dict[(tp, mtype, scitype)] = _concat(
79 | convert_dict[(tp, anchor_mtype, scitype)],
80 | convert_dict[(anchor_mtype, mtype, scitype)],
81 | )
82 |
83 | return convert_dict
84 |
--------------------------------------------------------------------------------
/skpro/utils/_maint/_show_versions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3 -u
2 | # License: BSD 3 clause
3 | """Utility methods to print system info for debugging.
4 |
5 | adapted from
6 | :func: `sklearn.show_versions`
7 | """
8 |
9 | __author__ = ["mloning", "fkiraly"]
10 | __all__ = ["show_versions"]
11 |
12 | import importlib
13 | import platform
14 | import sys
15 |
16 |
17 | def _get_sys_info():
18 | """System information.
19 |
20 | Return
21 | ------
22 | sys_info : dict
23 | system and Python version information
24 | """
25 | python = sys.version.replace("\n", " ")
26 |
27 | blob = [
28 | ("python", python),
29 | ("executable", sys.executable),
30 | ("machine", platform.platform()),
31 | ]
32 |
33 | return dict(blob)
34 |
35 |
36 | # dependencies to print versions of, by default
37 | DEFAULT_DEPS_TO_SHOW = [
38 | "pip",
39 | "sktime",
40 | "sklearn",
41 | "skbase",
42 | "numpy",
43 | "scipy",
44 | "pandas",
45 | "matplotlib",
46 | "joblib",
47 | "numba",
48 | "statsmodels",
49 | "pmdarima",
50 | "statsforecast",
51 | "tsfresh",
52 | "tslearn",
53 | "torch",
54 | "tensorflow",
55 | "tensorflow_probability",
56 | ]
57 |
58 |
59 | def _get_deps_info(deps=None):
60 | """Overview of the installed version of main dependencies.
61 |
62 | Parameters
63 | ----------
64 | deps : optional, list of strings with import names
65 | if None, behaves as deps = ["sktime"]
66 |
67 | Returns
68 | -------
69 | deps_info: dict
70 | version information on libraries in `deps`
71 | keys are import names, values are PEP 440 version strings
72 | of the import as present in the current python environment
73 | """
74 | if deps is None:
75 | deps = ["sktime"]
76 |
77 | def get_version(module):
78 | return getattr(module, "__version__", None)
79 |
80 | deps_info = {}
81 |
82 | for modname in deps:
83 | try:
84 | if modname in sys.modules:
85 | mod = sys.modules[modname]
86 | else:
87 | mod = importlib.import_module(modname)
88 | except ImportError:
89 | deps_info[modname] = None
90 | else:
91 | ver = get_version(mod)
92 | deps_info[modname] = ver
93 |
94 | return deps_info
95 |
96 |
97 | def show_versions():
98 | """Print python version, OS version, sktime version, selected dependency versions.
99 |
100 | Pretty prints:
101 |
102 | * python version of environment
103 | * python executable location
104 | * OS version
105 | * list of import name and version number for selected python dependencies
106 |
107 | Developer note:
108 | Python version/executable and OS version are from `_get_sys_info`
109 | Package versions are retrieved by `_get_deps_info`
110 | Selected dependencies are as in the DEFAULT_DEPS_TO_SHOW variable
111 | """
112 | sys_info = _get_sys_info()
113 | deps_info = _get_deps_info(deps=DEFAULT_DEPS_TO_SHOW)
114 |
115 | print("\nSystem:") # noqa: T001, T201
116 | for k, stat in sys_info.items():
117 | print(f"{k:>10}: {stat}") # noqa: T001, T201
118 |
119 | print("\nPython dependencies:") # noqa: T001, T201
120 | for k, stat in deps_info.items():
121 | print(f"{k:>13}: {stat}") # noqa: T001, T201
122 |
--------------------------------------------------------------------------------
/.github/workflows/wheels.yml:
--------------------------------------------------------------------------------
1 | name: Build wheels and publish to PyPI
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | check_tag:
9 | name: Check tag
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - uses: actions/checkout@v6
14 |
15 | - uses: actions/setup-python@v6
16 | with:
17 | python-version: '3.11'
18 |
19 | - shell: bash
20 | run: |
21 | TAG="${{ github.event.release.tag_name }}"
22 | GH_TAG_NAME="${TAG#v}"
23 | PY_VERSION=$(python - <<'PY'
24 | import pathlib, tomllib
25 | data = tomllib.loads(pathlib.Path("pyproject.toml").read_text(encoding="utf-8"))
26 | print(data.get("project").get("version"))
27 | PY
28 | )
29 | if [ "${GH_TAG_NAME}" != "${PY_VERSION}" ]; then
30 | echo "::error::Tag (${GH_TAG_NAME}) does not match pyproject.toml version (${PY_VERSION})."
31 | exit 2
32 | fi
33 |
34 | build_wheels:
35 | name: Build wheels
36 | runs-on: ubuntu-latest
37 |
38 | steps:
39 | - uses: actions/checkout@v6
40 |
41 | - uses: actions/setup-python@v6
42 | with:
43 | python-version: '3.11'
44 |
45 | - name: Build wheel
46 | run: |
47 | python -m pip install build
48 | python -m build --wheel --sdist --outdir wheelhouse
49 |
50 | - name: Store wheels
51 | uses: actions/upload-artifact@v6
52 | with:
53 | name: wheels
54 | path: wheelhouse/*
55 |
56 | test_wheels:
57 | needs: build_wheels
58 | name: Test wheels on ${{ matrix.os }} with ${{ matrix.python-version }}
59 | runs-on: ${{ matrix.os }}
60 | strategy:
61 | fail-fast: false # to not fail all combinations if just one fail
62 | matrix:
63 | os: [windows-latest, ubuntu-latest, macOS-latest]
64 | python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
65 |
66 | steps:
67 | - uses: actions/checkout@v6
68 | - uses: actions/setup-python@v6
69 | with:
70 | python-version: ${{ matrix.python-version }}
71 |
72 | - uses: actions/download-artifact@v7
73 | with:
74 | name: wheels
75 | path: wheelhouse
76 |
77 | # Set wheel filename differently for Unix vs Windows
78 | - name: Get wheel filename (Unix)
79 | if: runner.os != 'Windows'
80 | run: echo "WHEELNAME=$(ls ./wheelhouse/skpro-*none-any.whl)" >> $GITHUB_ENV
81 |
82 | - name: Get wheel filename (Windows)
83 | if: runner.os == 'Windows'
84 | run: echo "WHEELNAME=$(ls ./wheelhouse/skpro-*none-any.whl)" >> $env:GITHUB_ENV
85 |
86 | - name: Install wheel and extras
87 | run: python -m pip install "${{ env.WHEELNAME }}[all_extras,dev]"
88 |
89 | - name: Run tests
90 | run: |
91 | python -m pytest
92 |
93 | upload_wheels:
94 | name: Upload wheels to PyPI
95 | runs-on: ubuntu-latest
96 | needs: [build_wheels,test_wheels]
97 |
98 | permissions:
99 | id-token: write
100 |
101 | steps:
102 | - uses: actions/download-artifact@v7
103 | with:
104 | name: wheels
105 | path: wheelhouse
106 |
107 | - name: Publish package to PyPI
108 | uses: pypa/gh-action-pypi-publish@release/v1
109 | with:
110 | packages-dir: wheelhouse/
111 |
--------------------------------------------------------------------------------
/docs/source/api_reference/distributions.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _distributions_ref:
3 |
4 | Probability distributions
5 | =========================
6 |
7 | The :mod:`sktime.distributions` module contains
8 | probability distributions which combine a ``pandas.DataFrame``-like API
9 | with a ``scikit-base`` compatible object interface.
10 |
11 | All distributions in ``skpro`` can be listed using the ``skpro.registry.all_objects`` utility,
12 | using ``object_types="distribution"``, optionally filtered by tags.
13 | Valid tags can be listed using ``sktime.registry.all_tags``.
14 |
15 | Base
16 | ----
17 |
18 | .. currentmodule:: skpro.distributions.base
19 |
20 | .. autosummary::
21 | :toctree: auto_generated/
22 | :template: class.rst
23 |
24 | BaseDistribution
25 |
26 | Parametric distributions
27 | ------------------------
28 |
29 | Continuous support - full reals
30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31 |
32 | .. currentmodule:: skpro.distributions
33 |
34 | .. autosummary::
35 | :toctree: auto_generated/
36 | :template: class.rst
37 |
38 | Laplace
39 | Logistic
40 | Normal
41 | SkewNormal
42 | TDistribution
43 | TruncatedNormal
44 |
45 |
46 | Continuous support - non-negative reals
47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48 |
49 | .. currentmodule:: skpro.distributions
50 |
51 | .. autosummary::
52 | :toctree: auto_generated/
53 | :template: class.rst
54 |
55 | Alpha
56 | Beta
57 | ChiSquared
58 | Exponential
59 | Erlang
60 | Fisk
61 | Gamma
62 | LogGamma
63 | HalfCauchy
64 | HalfLogistic
65 | HalfNormal
66 | InverseGamma
67 | InverseGaussian
68 | LogLaplace
69 | Pareto
70 | Weibull
71 |
72 |
73 | Integer support
74 | ~~~~~~~~~~~~~~~
75 |
76 | .. currentmodule:: skpro.distributions
77 |
78 | .. autosummary::
79 | :toctree: auto_generated/
80 | :template: class.rst
81 |
82 | Binomial
83 | Geometric
84 | Hurdle
85 | NegativeBinomial
86 | Poisson
87 |
88 | Non-parametric and empirical distributions
89 | ------------------------------------------
90 |
91 | .. currentmodule:: skpro.distributions
92 |
93 | .. autosummary::
94 | :toctree: auto_generated/
95 | :template: class.rst
96 |
97 | Delta
98 | Empirical
99 | Histogram
100 | QPD_Empirical
101 | QPD_Johnson
102 | QPD_U
103 | QPD_S
104 | QPD_B
105 |
106 |
107 | Composite distributions
108 | -----------------------
109 |
110 | Parametric families
111 | ~~~~~~~~~~~~~~~~~~~
112 |
113 | .. currentmodule:: skpro.distributions
114 |
115 | .. autosummary::
116 | :toctree: auto_generated/
117 | :template: class.rst
118 |
119 | MeanScale
120 | TruncatedDistribution
121 | LeftTruncated
122 |
123 | Mixture composition
124 | ~~~~~~~~~~~~~~~~~~~
125 |
126 | .. currentmodule:: skpro.distributions
127 |
128 | .. autosummary::
129 | :toctree: auto_generated/
130 | :template: class.rst
131 |
132 | Mixture
133 |
134 | Transformation composition
135 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
136 |
137 | .. currentmodule:: skpro.distributions
138 |
139 | .. autosummary::
140 | :toctree: auto_generated/
141 | :template: class.rst
142 |
143 | TransformedDistribution
144 |
145 | Sampling and multivariate composition
146 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
147 |
148 | .. currentmodule:: skpro.distributions
149 |
150 | .. autosummary::
151 | :toctree: auto_generated/
152 | :template: class.rst
153 |
154 | IID
155 |
--------------------------------------------------------------------------------
/skpro/distributions/exponential.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Exponential probability distribution."""
3 |
4 | __author__ = ["ShreeshaM07"]
5 |
6 | import numpy as np
7 | import pandas as pd
8 | from scipy.stats import expon, rv_continuous
9 |
10 | from skpro.distributions.adapters.scipy import _ScipyAdapter
11 |
12 |
13 | class Exponential(_ScipyAdapter):
14 | r"""Exponential Distribution.
15 |
16 | Most methods wrap ``scipy.stats.expon``.
17 |
18 | The Exponential distribution is parametrized by mean :math:`\mu` and
19 | scale :math:`b`, such that the pdf is
20 |
21 | .. math:: f(x) = \lambda*\exp\left(-\lambda*x\right)
22 |
23 | The rate :math:`\lambda` is represented by the parameter ``rate``,
24 |
25 | Parameter
26 | ---------
27 | rate : float or array of float (1D or 2D)
28 | rate of the distribution
29 | rate = 1/scale
30 | index : pd.Index, optional, default = RangeIndex
31 | columns : pd.Index, optional, default = RangeIndex
32 |
33 | Examples
34 | --------
35 | >>> from skpro.distributions.exponential import Exponential
36 | >>> d = Exponential(rate=2)
37 | """
38 |
39 | _tags = {
40 | "capabilities:approx": ["ppf", "pdfnorm"],
41 | "capabilities:exact": [
42 | "mean",
43 | "var",
44 | "pdf",
45 | "log_pdf",
46 | "cdf",
47 | "energy",
48 | ],
49 | "distr:measuretype": "continuous",
50 | "broadcast_init": "on",
51 | }
52 |
53 | def __init__(self, rate, index=None, columns=None):
54 | self.rate = rate
55 |
56 | super().__init__(index=index, columns=columns)
57 |
58 | def _get_scipy_object(self) -> rv_continuous:
59 | return expon
60 |
61 | def _get_scipy_param(self):
62 | rate = self._bc_params["rate"]
63 | scale = 1 / rate
64 | return [], {"scale": scale}
65 |
66 | def _energy_self(self):
67 | r"""Energy of self, w.r.t. self.
68 |
69 | For Exponential(rate=λ), \mathbb{E}|X-Y| = 1/λ.
70 | """
71 | rate = self._bc_params["rate"]
72 | energy_arr = 1 / rate
73 | if energy_arr.ndim > 0:
74 | energy_arr = energy_arr.sum(axis=1)
75 | return energy_arr
76 |
77 | def _energy_x(self, x):
78 | r"""Energy of self, w.r.t. a constant frame x.
79 |
80 | Closed form for \mathbb{E}|X - x| with X ~ Exp(rate=λ):
81 | - if x < 0: 1/λ - x
82 | - if x >= 0: x - 1/λ + 2 e^{-λ x}/λ
83 | """
84 | rate = self._bc_params["rate"]
85 | # piecewise formula, vectorized
86 | energy_arr = (x >= 0) * (x - 1 / rate + 2 * np.exp(-rate * x) / rate)
87 | energy_arr += (x < 0) * (1 / rate - x)
88 | if energy_arr.ndim > 0:
89 | energy_arr = energy_arr.sum(axis=1)
90 | return energy_arr
91 |
92 | @classmethod
93 | def get_test_params(cls, parameter_set="default"):
94 | """Return testing parameter settings for the distribution."""
95 | params1 = {"rate": [1, 2, 2.5, 3.5, 5]}
96 | params2 = {"rate": 2}
97 | params3 = {
98 | "rate": [
99 | [2, 2, 2],
100 | [4, 4, 4],
101 | ],
102 | "index": pd.Index([1, 2]),
103 | "columns": pd.Index(["a", "b", "c"]),
104 | }
105 |
106 | return [params1, params2, params3]
107 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "skpro"
3 | version = "2.11.0"
4 | description = "A unified framework for tabular probabilistic regression, time-to-event prediction, and probability distributions in python"
5 | authors = [
6 | {name = "skpro developers", email = "info@sktime.net"},
7 | {name = "Franz Király"},
8 | {name = "Frithjof Gressmann"},
9 | {name = "Vitaly Davydov"},
10 | ]
11 | maintainers = [
12 | {name = "skpro developers", email = "info@sktime.net"},
13 | ]
14 | readme = "README.md"
15 | keywords = [
16 | "data-science",
17 | "machine-learning",
18 | "data-mining",
19 | "time-series",
20 | "scikit-learn",
21 | "regression",
22 | ]
23 | classifiers = [
24 | "Intended Audience :: Science/Research",
25 | "Intended Audience :: Developers",
26 | "License :: OSI Approved :: BSD License",
27 | "Programming Language :: Python",
28 | "Topic :: Software Development",
29 | "Topic :: Scientific/Engineering",
30 | "Operating System :: Microsoft :: Windows",
31 | "Operating System :: POSIX",
32 | "Operating System :: Unix",
33 | "Operating System :: MacOS",
34 | "Programming Language :: Python :: 3.10",
35 | "Programming Language :: Python :: 3.11",
36 | "Programming Language :: Python :: 3.12",
37 | "Programming Language :: Python :: 3.13",
38 | "Programming Language :: Python :: 3.14",
39 | ]
40 | requires-python = ">=3.9,<3.15"
41 | dependencies = [
42 | "numpy>=1.21.0,<2.4",
43 | "pandas>=1.1.0,<2.4.0",
44 | "packaging",
45 | "scikit-base>=0.6.1,<0.14.0",
46 | "scikit-learn>=0.24.0,<1.8.0",
47 | "scipy<2.0.0,>=1.2.0",
48 | ]
49 |
50 | [project.optional-dependencies]
51 | all_extras = [
52 | "distfit; python_version < '3.13'",
53 | "lifelines<0.31.0; python_version < '3.13'",
54 | "mapie; python_version < '3.13'",
55 | "matplotlib>=3.3.2",
56 | "ngboost<0.6.0; python_version < '3.13'",
57 | "polars<1.37.0",
58 | "pymc; python_version < '3.13'",
59 | "statsmodels>=0.12.1",
60 | ]
61 |
62 | dev = [
63 | "backoff",
64 | "httpx",
65 | "pre-commit",
66 | "pytest",
67 | "pytest-cov",
68 | "pytest-randomly",
69 | "pytest-timeout",
70 | "pytest-xdist",
71 | "wheel",
72 | ]
73 |
74 | binder = [
75 | "jupyter",
76 | ]
77 |
78 | docs = [
79 | "jupyter",
80 | "myst-parser",
81 | "nbsphinx>=0.8.6",
82 | "numpydoc",
83 | "pydata-sphinx-theme",
84 | "sphinx!=7.2.0,<9.0.0",
85 | "sphinx-design<0.7.0",
86 | "sphinx-issues<6.0.0",
87 | "sphinx-gallery<0.20.0",
88 | "sphinx-panels",
89 | "tabulate",
90 | ]
91 |
92 | [project.urls]
93 | Homepage = "https://github.com/sktime/skpro"
94 | Repository = "https://github.com/sktime/skpro"
95 | Documentation = "https://github.com/sktime/skpro"
96 | Download = "https://pypi.org/project/skpro/#files"
97 | "API Reference" = "https://github.com/sktime/skpro"
98 | "Release Notes" = "https://github.com/sktime/skpro"
99 |
100 | [project.license]
101 | file = "LICENSE"
102 |
103 | [build-system]
104 | requires = ["setuptools>61", "wheel", "toml", "build"]
105 | build-backend = "setuptools.build_meta"
106 |
107 | [tool.nbqa.exclude]
108 | black = "^docs/source/examples/"
109 | flake8 = "^docs/source/examples/"
110 | isort = "^docs/source/examples/"
111 |
112 | [tool.setuptools]
113 | zip-safe = true
114 |
115 | [tool.setuptools.package-data]
116 | sktime = [
117 | "*.csv",
118 | "*.csv.gz",
119 | "*.txt",
120 | ]
121 |
122 | [tool.setuptools.packages.find]
123 | exclude = ["tests", "tests.*"]
124 |
--------------------------------------------------------------------------------
/skpro/distributions/adapters/scipy/_distribution.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Adapter for Scipy Distributions."""
3 |
4 | __author__ = ["malikrafsan"]
5 |
6 | from typing import Union
7 |
8 | import pandas as pd
9 | from scipy.stats import rv_continuous, rv_discrete
10 |
11 | from skpro.distributions.base import BaseDistribution
12 |
13 | __all__ = ["_ScipyAdapter"]
14 |
15 |
16 | class _ScipyAdapter(BaseDistribution):
17 | """Adapter for scipy distributions.
18 |
19 | This class is an adapter for scipy distributions. It provides a common
20 | interface for all scipy distributions. The class is abstract
21 | and should not be instantiated directly.
22 | """
23 |
24 | _distribution_attr = "_dist"
25 | _tags = {
26 | "object_type": "distribution",
27 | "distr:paramtype": "parametric",
28 | }
29 |
30 | def __init__(self, index=None, columns=None):
31 | obj = self._get_scipy_object()
32 | setattr(self, self._distribution_attr, obj)
33 | super().__init__(index, columns)
34 |
35 | def _get_scipy_object(self) -> Union[rv_continuous, rv_discrete]:
36 | """Abstract method to get the scipy distribution object.
37 |
38 | Should import the scipy distribution object and return it.
39 | """
40 | raise NotImplementedError("abstract method")
41 |
42 | def _get_scipy_param(self):
43 | """Abstract method to get the scipy distribution parameters.
44 |
45 | Should return a tuple with two elements: a list of positional arguments (args)
46 | and a dictionary of keyword arguments (kwds).
47 | """
48 | raise NotImplementedError("abstract method")
49 |
50 | def _mean(self):
51 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
52 | args, kwds = self._get_scipy_param()
53 | return obj.mean(*args, **kwds)
54 |
55 | def _var(self):
56 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
57 | args, kwds = self._get_scipy_param()
58 | return obj.var(*args, **kwds)
59 |
60 | def _pdf(self, x: pd.DataFrame):
61 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
62 | args, kwds = self._get_scipy_param()
63 | return obj.pdf(x, *args, **kwds)
64 |
65 | def _log_pdf(self, x: pd.DataFrame):
66 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
67 | args, kwds = self._get_scipy_param()
68 | return obj.logpdf(x, *args, **kwds)
69 |
70 | def _cdf(self, x: pd.DataFrame):
71 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
72 | args, kwds = self._get_scipy_param()
73 | return obj.cdf(x, *args, **kwds)
74 |
75 | def _ppf(self, p: pd.DataFrame):
76 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
77 | args, kwds = self._get_scipy_param()
78 | return obj.ppf(p, *args, **kwds)
79 |
80 | def _pmf(self, x: pd.DataFrame):
81 | """Return the probability mass function evaluated at x."""
82 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
83 | args, kwds = self._get_scipy_param()
84 | return obj.pmf(x, *args, **kwds)
85 |
86 | def _log_pmf(self, x: pd.DataFrame):
87 | """Return the log of the probability mass function evaluated at x."""
88 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
89 | args, kwds = self._get_scipy_param()
90 | return obj.logpmf(x, *args, **kwds)
91 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. _home:
2 |
3 | ================
4 | Welcome to skpro
5 | ================
6 |
7 | ``skpro`` is a library for supervised probabilistic prediction and
8 | tabular probability distributions in python.
9 |
10 | Features
11 | ========
12 |
13 | ``skpro`` provides unified, ``sklearn`` and ``skbase`` compatible interfaces to:
14 |
15 | * tabular **supervised regressors for probabilistic prediction** - interval, quantile and distribution predictions
16 | * tabular **probabilistic time-to-event and survival prediction** - instance-individual survival distributions
17 | * **metrics to evaluate probabilistic predictions**, e.g., pinball loss, empirical coverage, CRPS
18 | * **reductions** to turn ``sklearn`` regressors into probabilistic ``skpro`` regressors, such as bootstrap or conformal
19 | * building **pipelines and composite models**, including tuning via probabilistic performance metrics
20 | * symbolic **probability distributions** with value domain of ``pandas.DataFrame``-s and ``pandas``-like interface
21 |
22 | Technical specification
23 | =======================
24 |
25 | * In-memory computation of a single machine, no distributed computing
26 | * Medium-sized data in pandas and NumPy based containers
27 | * Modular, principled and object-oriented API
28 | * Using interactive Python interpreter, no command-line interface or graphical user interface
29 |
30 | Contents
31 | ========
32 |
33 | .. toctree::
34 | :maxdepth: 1
35 | :hidden:
36 |
37 | get_started
38 | users
39 | installation
40 | api_reference
41 | get_involved
42 | developer_guide
43 | about
44 | examples
45 |
46 | From here, you can navigate to:
47 |
48 | .. grid:: 1 2 2 2
49 | :gutter: 3
50 |
51 | .. grid-item-card:: Get Started
52 | :text-align: center
53 |
54 | Get started using ``skpro`` quickly.
55 |
56 | +++
57 |
58 | .. button-ref:: get_started
59 | :color: primary
60 | :click-parent:
61 | :expand:
62 |
63 | Get Started
64 |
65 | .. grid-item-card:: User Documentation
66 | :text-align: center
67 |
68 | Find user documentation.
69 |
70 | +++
71 |
72 | .. button-ref:: users
73 | :color: primary
74 | :click-parent:
75 | :expand:
76 |
77 | Users
78 |
79 | .. grid-item-card:: API Reference
80 | :text-align: center
81 |
82 | Understand ``skpro``'s API.
83 |
84 | +++
85 |
86 | .. button-ref:: api_reference
87 | :color: primary
88 | :click-parent:
89 | :expand:
90 |
91 | API Reference
92 |
93 | .. grid-item-card:: Get Involved
94 | :text-align: center
95 |
96 | Find out how you can contribute.
97 |
98 | +++
99 |
100 | .. button-ref:: contribute
101 | :color: primary
102 | :click-parent:
103 | :expand:
104 |
105 | Get Involved
106 |
107 | .. grid-item-card:: Changelog
108 | :text-align: center
109 |
110 | See how the package has changed.
111 |
112 | +++
113 |
114 | .. button-ref:: changelog
115 | :color: primary
116 | :click-parent:
117 | :expand:
118 |
119 | Changelog
120 |
121 | .. grid-item-card:: About
122 | :text-align: center
123 |
124 | Learn more about ``skpro``.
125 |
126 | +++
127 |
128 | .. button-ref:: about
129 | :color: primary
130 | :click-parent:
131 | :expand:
132 |
133 | Learn More
134 |
--------------------------------------------------------------------------------
/skpro/utils/tests/test_plots.py:
--------------------------------------------------------------------------------
1 | """Test functionality of time series plotting functions."""
2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
3 |
4 | import pytest
5 | from skbase.utils.dependencies import _check_soft_dependencies
6 |
7 | from skpro.tests.test_switch import run_test_module_changed
8 |
9 |
10 | @pytest.mark.skipif(
11 | not run_test_module_changed("skpro.utils")
12 | or not _check_soft_dependencies("matplotlib", severity="none"),
13 | reason="skip test if required soft dependency for matplotlib not available",
14 | )
15 | def test_plot_crossplot_interval():
16 | """Test that plot_crossplot_interval runs without error."""
17 | _check_soft_dependencies("matplotlib")
18 |
19 | from sklearn.datasets import load_diabetes
20 | from sklearn.ensemble import RandomForestRegressor
21 | from sklearn.linear_model import LinearRegression
22 |
23 | from skpro.regression.residual import ResidualDouble
24 | from skpro.utils.plotting import plot_crossplot_interval
25 |
26 | X, y = load_diabetes(return_X_y=True, as_frame=True)
27 | reg_mean = LinearRegression()
28 | reg_resid = RandomForestRegressor()
29 | reg_proba = ResidualDouble(reg_mean, reg_resid)
30 |
31 | reg_proba.fit(X, y)
32 | y_pred_proba = reg_proba.predict_proba(X)
33 |
34 | plot_crossplot_interval(y, y_pred_proba, coverage=0.8)
35 | plot_crossplot_interval(y, y_pred_proba)
36 |
37 | y_pred_interval = reg_proba.predict_interval(X, coverage=0.7)
38 | plot_crossplot_interval(y, y_pred_interval)
39 |
40 |
41 | @pytest.mark.skipif(
42 | not run_test_module_changed("skpro.utils")
43 | or not _check_soft_dependencies("matplotlib", severity="none"),
44 | reason="skip test if required soft dependency for matplotlib not available",
45 | )
46 | def test_plot_crossplot_std():
47 | """Test that plot_crossplot_std runs without error."""
48 | _check_soft_dependencies("matplotlib")
49 |
50 | from sklearn.datasets import load_diabetes
51 | from sklearn.ensemble import RandomForestRegressor
52 | from sklearn.linear_model import LinearRegression
53 |
54 | from skpro.regression.residual import ResidualDouble
55 | from skpro.utils.plotting import plot_crossplot_std
56 |
57 | X, y = load_diabetes(return_X_y=True, as_frame=True)
58 | reg_mean = LinearRegression()
59 | reg_resid = RandomForestRegressor()
60 | reg_proba = ResidualDouble(reg_mean, reg_resid)
61 |
62 | reg_proba.fit(X, y)
63 | y_pred = reg_proba.predict_proba(X)
64 |
65 | plot_crossplot_std(y, y_pred)
66 |
67 | y_pred_var = reg_proba.predict_var(X)
68 | plot_crossplot_std(y, y_pred_var)
69 |
70 |
71 | @pytest.mark.skipif(
72 | not run_test_module_changed("skpro.utils")
73 | or not _check_soft_dependencies("matplotlib", severity="none"),
74 | reason="skip test if required soft dependency for matplotlib not available",
75 | )
76 | def test_plot_crossplot_loss():
77 | """Test that plot_crossplot_loss runs without error."""
78 | _check_soft_dependencies("matplotlib")
79 |
80 | from sklearn.datasets import load_diabetes
81 | from sklearn.ensemble import RandomForestRegressor
82 | from sklearn.linear_model import LinearRegression
83 |
84 | from skpro.metrics import CRPS
85 | from skpro.regression.residual import ResidualDouble
86 | from skpro.utils.plotting import plot_crossplot_loss
87 |
88 | X, y = load_diabetes(return_X_y=True, as_frame=True)
89 | reg_mean = LinearRegression()
90 | reg_resid = RandomForestRegressor()
91 | reg_proba = ResidualDouble(reg_mean, reg_resid)
92 |
93 | reg_proba.fit(X, y)
94 | y_pred = reg_proba.predict_proba(X)
95 |
96 | crps_metric = CRPS()
97 | plot_crossplot_loss(y, y_pred, crps_metric)
98 |
--------------------------------------------------------------------------------
/skpro/distributions/truncated_normal.py:
--------------------------------------------------------------------------------
1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
2 | """Truncated Normal probability distribution."""
3 |
4 | __author__ = ["ShreeshaM07"]
5 |
6 | import pandas as pd
7 | from scipy.stats import rv_continuous, truncnorm
8 |
9 | from skpro.distributions.adapters.scipy import _ScipyAdapter
10 |
11 |
12 | class TruncatedNormal(_ScipyAdapter):
13 | """A truncated normal probability distribution.
14 |
15 | Most methods wrap ``scipy.stats.truncnorm``.
16 | It truncates the normal distribution at
17 | the abscissa ``l_trunc`` and ``r_trunc``.
18 |
19 | Note: The truncation parameters passed
20 | is internally shifted to be centred at
21 | mean and scaled by sigma.
22 |
23 | Parameters
24 | ----------
25 | mu : float or array of float (1D or 2D)
26 | mean of the normal distribution
27 | sigma : float or array of float (1D or 2D), must be positive
28 | standard deviation of the normal distribution
29 | l_trunc : float or array of float (1D or 2D)
30 | Left truncation abscissa.
31 | r_trunc : float or array of float (1D or 2D)
32 | Right truncation abscissa.
33 | index : pd.Index, optional, default = RangeIndex
34 | columns : pd.Index, optional, default = RangeIndex
35 |
36 | Examples
37 | --------
38 | >>> from skpro.distributions.truncated_normal import TruncatedNormal
39 |
40 | >>> d = TruncatedNormal(\
41 | mu=[[0, 1], [2, 3], [4, 5]],\
42 | sigma= 1,\
43 | l_trunc= [[-0.1,0.5],[1.5,2.4],[4.1,5]],\
44 | r_trunc= [[0.8,2],[4,5],[5,7]]\
45 | )
46 | """
47 |
48 | _tags = {
49 | "capabilities:approx": ["energy", "pdfnorm"],
50 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
51 | "distr:measuretype": "continuous",
52 | "distr:paramtype": "parametric",
53 | "broadcast_init": "on",
54 | }
55 |
56 | def __init__(self, mu, sigma, l_trunc, r_trunc, index=None, columns=None):
57 | self.mu = mu
58 | self.sigma = sigma
59 | self.l_trunc = l_trunc
60 | self.r_trunc = r_trunc
61 |
62 | super().__init__(index=index, columns=columns)
63 |
64 | def _get_scipy_object(self) -> rv_continuous:
65 | return truncnorm
66 |
67 | def _get_scipy_param(self):
68 | mu = self._bc_params["mu"]
69 | sigma = self._bc_params["sigma"]
70 | l_trunc = self._bc_params["l_trunc"]
71 | r_trunc = self._bc_params["r_trunc"]
72 |
73 | # shift it to be centred at mu and sigma
74 | a = (l_trunc - mu) / sigma
75 | b = (r_trunc - mu) / sigma
76 |
77 | return [], {
78 | "loc": mu,
79 | "scale": sigma,
80 | "a": a,
81 | "b": b,
82 | }
83 |
84 | @classmethod
85 | def get_test_params(cls, parameter_set="default"):
86 | """Return testing parameter settings for the estimator."""
87 | # array case examples
88 | params1 = {
89 | "mu": [[0, 1], [2, 3], [4, 5]],
90 | "sigma": 1,
91 | "l_trunc": [[-0.1, 0.5], [1.5, 2.4], [4.1, 5]],
92 | "r_trunc": [[0.8, 2], [4, 5], [5, 7]],
93 | }
94 | params2 = {
95 | "mu": 0,
96 | "sigma": 1,
97 | "l_trunc": [-10, -5],
98 | "r_trunc": [5, 10],
99 | "index": pd.Index([1, 2, 5]),
100 | "columns": pd.Index(["a", "b"]),
101 | }
102 | # scalar case examples
103 | params3 = {"mu": 1, "sigma": 2, "l_trunc": -3, "r_trunc": 5}
104 | return [params1, params2, params3]
105 |
--------------------------------------------------------------------------------