├── skpro ├── utils │ ├── _maint │ │ ├── __init__.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_show_versions.py │ │ └── _show_versions.py │ ├── tests │ │ ├── __init__.py │ │ └── test_plots.py │ ├── __init__.py │ ├── deep_equals │ │ └── __init__.py │ ├── index.py │ ├── numpy.py │ ├── pandas.py │ ├── sklearn.py │ ├── random_state.py │ ├── utils.py │ ├── _doctest.py │ └── retrieval.py ├── datatypes │ ├── _adapter │ │ └── __init__.py │ ├── tests │ │ └── __init__.py │ ├── _convert_utils │ │ ├── __init__.py │ │ ├── _coerce.py │ │ └── _convert.py │ ├── _base │ │ └── __init__.py │ ├── _table │ │ ├── __init__.py │ │ ├── _registry.py │ │ └── _base.py │ ├── _proba │ │ ├── __init__.py │ │ └── _registry.py │ ├── __init__.py │ └── _common.py ├── regression │ ├── parametric │ │ └── __init__.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_glum.py │ │ ├── test_ondil.py │ │ ├── test_glm.py │ │ └── test_cyclic_boosting.py │ ├── adapters │ │ ├── __init__.py │ │ ├── ngboost │ │ │ └── __init__.py │ │ └── sklearn │ │ │ └── __init__.py │ ├── gam │ │ └── __init__.py │ ├── gp │ │ └── __init__.py │ ├── jackknife │ │ └── __init__.py │ ├── binned │ │ └── __init__.py │ ├── compose │ │ └── __init__.py │ ├── base │ │ ├── adapters │ │ │ ├── __init__.py │ │ │ └── _sklearn.py │ │ └── __init__.py │ ├── ensemble │ │ └── __init__.py │ ├── online │ │ └── __init__.py │ ├── bayesian │ │ └── __init__.py │ ├── __init__.py │ ├── conformal │ │ └── __init__.py │ └── linear │ │ └── __init__.py ├── tests │ ├── __init__.py │ ├── tests │ │ └── __init__.py │ ├── scenarios │ │ └── __init__.py │ ├── _config_test_dummy.py │ ├── _config.py │ ├── utils.py │ └── _test_vm.py ├── benchmarking │ ├── __init__.py │ └── tests │ │ └── __init__.py ├── metrics │ ├── tests │ │ ├── __init__.py │ │ └── test_distr_metrics.py │ ├── survival │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_c_harrell.py │ │ └── __init__.py │ ├── __init__.py │ └── _coerce.py ├── registry │ ├── tests │ │ ├── __init__.py │ │ ├── test_tags.py │ │ └── test_scitype.py │ ├── __init__.py │ └── _scitype.py ├── survival │ ├── __init__.py │ ├── tree │ │ └── __init__.py │ ├── additive │ │ └── __init__.py │ ├── adapters │ │ └── __init__.py │ ├── coxph │ │ └── __init__.py │ ├── compose │ │ └── __init__.py │ ├── aft │ │ └── __init__.py │ ├── ensemble │ │ └── __init__.py │ └── base.py ├── distributions │ ├── base │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_multiindex.py │ │ └── __init__.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_hurdle.py │ │ ├── test_empirical.py │ │ └── test_qpd.py │ ├── adapters │ │ ├── __init__.py │ │ ├── scipy │ │ │ ├── tests │ │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ ├── _empirical.py │ │ │ └── _distribution.py │ │ └── statsmodels │ │ │ ├── __init__.py │ │ │ └── _empirical.py │ ├── compose │ │ └── __init__.py │ ├── trafo │ │ └── __init__.py │ ├── poisson.py │ ├── negative_binomial.py │ ├── geometric.py │ ├── binomial.py │ ├── left_truncated.py │ ├── erlang.py │ ├── loggamma.py │ ├── fisk.py │ ├── halfnormal.py │ ├── inversegamma.py │ ├── alpha.py │ ├── loglaplace.py │ ├── halfcauchy.py │ ├── halflogistic.py │ ├── inversegaussian.py │ ├── __init__.py │ ├── exponential.py │ └── truncated_normal.py ├── __init__.py ├── model_selection │ └── __init__.py └── base │ ├── __init__.py │ └── _base.py ├── docs ├── _static │ ├── .gitignore │ ├── base_api.png │ ├── overview.png │ ├── logo │ │ ├── logo.png │ │ └── skpro-banner.png │ ├── parametric.png │ ├── pymc_example_plot.png │ └── simple_example_plot.png ├── source │ ├── about │ │ ├── contributors.md │ │ ├── roadmap.rst │ │ ├── team.rst │ │ ├── mission.rst │ │ ├── governance.rst │ │ └── history.rst │ ├── contribute │ │ ├── team.rst │ │ └── code_of_conduct.rst │ ├── images │ │ └── skpro-banner.png │ ├── related_software.rst │ ├── _static │ │ ├── class.rst │ │ ├── function.rst │ │ └── class_with_call.rst │ ├── api_reference │ │ ├── base.rst │ │ ├── metrics.rst │ │ ├── utils.rst │ │ └── distributions.rst │ ├── developer_guide │ │ ├── add_estimators.rst │ │ └── reviewer_guide.rst │ ├── tutorials.rst │ ├── api_reference.rst │ ├── get_started.rst │ ├── includes │ │ └── api_css.rst │ ├── user_guide.rst │ ├── contribute.rst │ ├── users.rst │ ├── developer_guide.rst │ ├── about.rst │ └── index.rst └── Makefile ├── .github ├── workflows │ ├── cancel.yml │ ├── dependency-review.yml │ ├── update_contributors.yml │ └── wheels.yml ├── ISSUE_TEMPLATE │ ├── report-all-other-issues-or-questions.md │ ├── maintenance-issue.md │ ├── documentation-issue.md │ ├── feature_request.md │ └── bug_report.md ├── dependabot.yml └── .codecov.yml ├── AUTHORS.rst ├── .readthedocs.yml ├── .coveragerc ├── CITATION.rst ├── CODEOWNERS ├── .gitignore ├── .binder └── Dockerfile ├── conftest.py ├── setup.cfg ├── CONTRIBUTING.md ├── LICENSE.txt ├── Makefile ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md └── pyproject.toml /skpro/utils/_maint/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /skpro/datatypes/_adapter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /skpro/regression/parametric/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_static/.gitignore: -------------------------------------------------------------------------------- 1 | # Empty directory 2 | -------------------------------------------------------------------------------- /skpro/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for skpro package.""" 2 | -------------------------------------------------------------------------------- /skpro/utils/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for utilities.""" 2 | -------------------------------------------------------------------------------- /skpro/benchmarking/__init__.py: -------------------------------------------------------------------------------- 1 | """Benchmarking and evaluation.""" 2 | -------------------------------------------------------------------------------- /skpro/datatypes/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for data types module.""" 2 | -------------------------------------------------------------------------------- /skpro/tests/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for the test utilities.""" 2 | -------------------------------------------------------------------------------- /skpro/metrics/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for probabilistic metrics.""" 2 | -------------------------------------------------------------------------------- /skpro/tests/scenarios/__init__.py: -------------------------------------------------------------------------------- 1 | """Test scenarios for estimators.""" 2 | -------------------------------------------------------------------------------- /docs/source/about/contributors.md: -------------------------------------------------------------------------------- 1 | ```{include} ../../../CONTRIBUTORS.md 2 | ``` 3 | -------------------------------------------------------------------------------- /skpro/utils/_maint/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for the show_versions utility.""" 2 | -------------------------------------------------------------------------------- /skpro/benchmarking/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for benchmarking and evaluation.""" 2 | -------------------------------------------------------------------------------- /skpro/datatypes/_convert_utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Conversion auxiliary utilities.""" 2 | -------------------------------------------------------------------------------- /skpro/registry/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for registry and lookup functionality.""" 2 | -------------------------------------------------------------------------------- /skpro/survival/__init__.py: -------------------------------------------------------------------------------- 1 | """Survival or time-to-event prediction estimators.""" 2 | -------------------------------------------------------------------------------- /docs/source/contribute/team.rst: -------------------------------------------------------------------------------- 1 | .. _contrib_team: 2 | 3 | .. include:: ../about/team.rst 4 | -------------------------------------------------------------------------------- /skpro/distributions/base/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for skpro distribution base class.""" 2 | -------------------------------------------------------------------------------- /skpro/regression/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for probabilistic supervised regressors.""" 2 | -------------------------------------------------------------------------------- /docs/_static/base_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/base_api.png -------------------------------------------------------------------------------- /docs/_static/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/overview.png -------------------------------------------------------------------------------- /skpro/distributions/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for skpro probability distribution objects.""" 2 | -------------------------------------------------------------------------------- /docs/_static/logo/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/logo/logo.png -------------------------------------------------------------------------------- /docs/_static/parametric.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/parametric.png -------------------------------------------------------------------------------- /docs/_static/logo/skpro-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/logo/skpro-banner.png -------------------------------------------------------------------------------- /docs/_static/pymc_example_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/pymc_example_plot.png -------------------------------------------------------------------------------- /docs/source/images/skpro-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/source/images/skpro-banner.png -------------------------------------------------------------------------------- /docs/_static/simple_example_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sktime/skpro/HEAD/docs/_static/simple_example_plot.png -------------------------------------------------------------------------------- /docs/source/related_software.rst: -------------------------------------------------------------------------------- 1 | .. _related_software: 2 | 3 | ================ 4 | Related Software 5 | ================ 6 | 7 | TODO 8 | -------------------------------------------------------------------------------- /skpro/regression/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | """Adapters for probabilistic regressors.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | -------------------------------------------------------------------------------- /skpro/__init__.py: -------------------------------------------------------------------------------- 1 | """skpro.""" 2 | 3 | __version__ = "2.11.0" 4 | 5 | __all__ = ["show_versions"] 6 | 7 | from skpro.utils._maint._show_versions import show_versions 8 | -------------------------------------------------------------------------------- /skpro/distributions/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | """Adapters for probability distribution objects.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | -------------------------------------------------------------------------------- /skpro/survival/tree/__init__.py: -------------------------------------------------------------------------------- 1 | """Cox proportional hazards models.""" 2 | 3 | from skpro.survival.tree._tree_sksurv import SurvivalTree 4 | 5 | __all__ = ["SurvivalTree"] 6 | -------------------------------------------------------------------------------- /skpro/metrics/survival/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for metrics for time-to-event or survival prediction.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | -------------------------------------------------------------------------------- /skpro/survival/additive/__init__.py: -------------------------------------------------------------------------------- 1 | """Generalized additive survival models.""" 2 | 3 | __all__ = ["AalenAdditive"] 4 | 5 | from skpro.survival.additive._aalen_lifelines import AalenAdditive 6 | -------------------------------------------------------------------------------- /skpro/distributions/adapters/scipy/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for adapters for probability distribution objects, scipy facing.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | -------------------------------------------------------------------------------- /skpro/model_selection/__init__.py: -------------------------------------------------------------------------------- 1 | """Tuning and model selection.""" 2 | 3 | __all__ = ["GridSearchCV", "RandomizedSearchCV"] 4 | 5 | from skpro.model_selection._tuning import GridSearchCV, RandomizedSearchCV 6 | -------------------------------------------------------------------------------- /skpro/datatypes/_base/__init__.py: -------------------------------------------------------------------------------- 1 | """Base module for datatypes.""" 2 | 3 | from skpro.datatypes._base._base import BaseConverter, BaseDatatype, BaseExample 4 | 5 | __all__ = ["BaseConverter", "BaseDatatype", "BaseExample"] 6 | -------------------------------------------------------------------------------- /docs/source/_static/class.rst: -------------------------------------------------------------------------------- 1 | {{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. raw:: html 9 | 10 |
11 | -------------------------------------------------------------------------------- /skpro/regression/gam/__init__.py: -------------------------------------------------------------------------------- 1 | """GAM regressor using pyGAM.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.regression.gam._gam import GAMRegressor 5 | 6 | __all__ = ["GAMRegressor"] 7 | -------------------------------------------------------------------------------- /docs/source/_static/function.rst: -------------------------------------------------------------------------------- 1 | {{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. raw:: html 9 | 10 |
11 | -------------------------------------------------------------------------------- /skpro/regression/gp/__init__.py: -------------------------------------------------------------------------------- 1 | """Gaussian process models.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.regression.gp._sklearn import GaussianProcess 5 | 6 | __all__ = ["GaussianProcess"] 7 | -------------------------------------------------------------------------------- /skpro/regression/jackknife/__init__.py: -------------------------------------------------------------------------------- 1 | """MAPIE Jackknife Regressors.""" 2 | 3 | from skpro.regression.jackknife._mapie_jackknife import ( 4 | MapieJackknifeAfterBootstrapRegressor, 5 | ) 6 | 7 | __all__ = ["MapieJackknifeAfterBootstrapRegressor"] 8 | -------------------------------------------------------------------------------- /skpro/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility functionality.""" 2 | 3 | from skpro.utils.deep_equals import deep_equals 4 | from skpro.utils.estimator_checks import check_estimator 5 | 6 | __all__ = [ 7 | "check_estimator", 8 | "deep_equals", 9 | ] 10 | -------------------------------------------------------------------------------- /skpro/distributions/compose/__init__.py: -------------------------------------------------------------------------------- 1 | """Probability distribution objects.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | # adapted from sktime 4 | 5 | __all__ = ["IID"] 6 | 7 | from skpro.distributions.compose._iid import IID 8 | -------------------------------------------------------------------------------- /skpro/utils/deep_equals/__init__.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Module for nested equality checking.""" 3 | from skpro.utils.deep_equals._deep_equals import deep_equals 4 | 5 | __all__ = [ 6 | "deep_equals", 7 | ] 8 | -------------------------------------------------------------------------------- /docs/source/_static/class_with_call.rst: -------------------------------------------------------------------------------- 1 | {{objname}} 2 | {{ underline }}=============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | :special-members: __call__ 8 | 9 | .. raw:: html 10 | 11 |
12 | -------------------------------------------------------------------------------- /skpro/tests/_config_test_dummy.py: -------------------------------------------------------------------------------- 1 | """Test dummy for testing config skips.""" 2 | 3 | 4 | from skpro.regression.base import BaseProbaRegressor # noqa: E402 5 | 6 | 7 | class DummySkipped(BaseProbaRegressor): 8 | """Dummy regressor to test exclusion.""" 9 | 10 | pass 11 | -------------------------------------------------------------------------------- /skpro/survival/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | """Module containing adapters other framework packages covering multiple tasks.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | __all__ = ["_SksurvAdapter"] 5 | 6 | from skpro.survival.adapters.sksurv import _SksurvAdapter 7 | -------------------------------------------------------------------------------- /skpro/regression/binned/__init__.py: -------------------------------------------------------------------------------- 1 | """Reduction to probabilistic classification.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.regression.binned._sklearn_bin_regressor import HistBinnedProbaRegressor 5 | 6 | __all__ = ["HistBinnedProbaRegressor"] 7 | -------------------------------------------------------------------------------- /skpro/distributions/trafo/__init__.py: -------------------------------------------------------------------------------- 1 | """Probability distribution objects.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | # adapted from sktime 4 | 5 | __all__ = ["TransformedDistribution"] 6 | 7 | from skpro.distributions.trafo._transformed import TransformedDistribution 8 | -------------------------------------------------------------------------------- /skpro/regression/adapters/ngboost/__init__.py: -------------------------------------------------------------------------------- 1 | """Adapters for probabilistic regressors, towards sklearn.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.regression.adapters.ngboost._ngboost_proba import NGBoostAdapter 5 | 6 | __all__ = ["NGBoostAdapter"] 7 | -------------------------------------------------------------------------------- /skpro/regression/adapters/sklearn/__init__.py: -------------------------------------------------------------------------------- 1 | """Adapters for probabilistic regressors, towards sklearn.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.regression.adapters.sklearn._sklearn_proba import SklearnProbaReg 5 | 6 | __all__ = ["SklearnProbaReg"] 7 | -------------------------------------------------------------------------------- /docs/source/about/roadmap.rst: -------------------------------------------------------------------------------- 1 | .. _roadmap: 2 | 3 | ======= 4 | Roadmap 5 | ======= 6 | 7 | Welcome to ``skbase``'s roadmap. 8 | 9 | .. note:: 10 | 11 | The project is under active planning and development. We will continue to update 12 | our roadmap as the project matures and we plan future work. 13 | -------------------------------------------------------------------------------- /skpro/base/__init__.py: -------------------------------------------------------------------------------- 1 | """Base module with base classes BaseObject, BaseEstimator.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | __all__ = ["BaseEstimator", "BaseMetaEstimator", "BaseObject"] 5 | 6 | from skpro.base._base import BaseEstimator, BaseMetaEstimator, BaseObject 7 | -------------------------------------------------------------------------------- /skpro/distributions/adapters/statsmodels/__init__.py: -------------------------------------------------------------------------------- 1 | """Adapters for probability distribution objects, statsmodels facing.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.distributions.adapters.statsmodels._empirical import empirical_from_rvdf 5 | 6 | __all__ = ["empirical_from_rvdf"] 7 | -------------------------------------------------------------------------------- /.github/workflows/cancel.yml: -------------------------------------------------------------------------------- 1 | name: Cancel 2 | on: 3 | workflow_run: 4 | workflows: ["Test"] 5 | types: 6 | - requested 7 | jobs: 8 | cancel: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: styfle/cancel-workflow-action@0.12.1 12 | with: 13 | workflow_id: ${{ github.event.workflow.id }} 14 | -------------------------------------------------------------------------------- /skpro/regression/compose/__init__.py: -------------------------------------------------------------------------------- 1 | """Composition and pipelines for probabilistic supervised regression.""" 2 | 3 | from skpro.regression.compose._pipeline import Pipeline 4 | from skpro.regression.compose._ttr import TransformedTargetRegressor 5 | 6 | __all__ = [ 7 | "Pipeline", 8 | "TransformedTargetRegressor", 9 | ] 10 | -------------------------------------------------------------------------------- /skpro/regression/base/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | """Base classes for adapting probabilistic regressors to the skproframework.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | __all__ = ["_DelegateWithFittedParamForwarding"] 5 | 6 | from skpro.regression.base.adapters._sklearn import _DelegateWithFittedParamForwarding 7 | -------------------------------------------------------------------------------- /skpro/regression/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | """Natural Gradient Boosting Regressor models.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.regression.ensemble._bagging import BaggingRegressor 5 | from skpro.regression.ensemble._ngboost import NGBoostRegressor 6 | 7 | __all__ = ["BaggingRegressor", "NGBoostRegressor"] 8 | -------------------------------------------------------------------------------- /docs/source/about/team.rst: -------------------------------------------------------------------------------- 1 | .. _team: 2 | 3 | ================ 4 | Development Team 5 | ================ 6 | 7 | This package is currently maintained by the ``sktime`` community, see 8 | `sktime team `_. 9 | 10 | This project is currently to be considered part of ``sktime``, 11 | and not a separate entity. 12 | -------------------------------------------------------------------------------- /skpro/regression/base/__init__.py: -------------------------------------------------------------------------------- 1 | """Base classes for probabilistic regression.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | __all__ = ["BaseProbaRegressor", "_DelegatedProbaRegressor"] 5 | 6 | from skpro.regression.base._base import BaseProbaRegressor 7 | from skpro.regression.base._delegate import _DelegatedProbaRegressor 8 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Developers 3 | ========== 4 | 5 | **skpro** is developed by the sktime community. 6 | 7 | We follow the all-contributors specification for giving credit. 8 | Contributions of any kind are welcome! 9 | 10 | For a list of contributors, see the file 11 | `all-contributorsrc `_. 12 | -------------------------------------------------------------------------------- /skpro/survival/coxph/__init__.py: -------------------------------------------------------------------------------- 1 | """Cox proportional hazards models.""" 2 | 3 | from skpro.survival.coxph._coxnet_sksurv import CoxNet 4 | from skpro.survival.coxph._coxph_lifelines import CoxPHlifelines 5 | from skpro.survival.coxph._coxph_sksurv import CoxPHSkSurv 6 | from skpro.survival.coxph._coxph_statsmodels import CoxPH 7 | 8 | __all__ = ["CoxNet", "CoxPH", "CoxPHlifelines", "CoxPHSkSurv"] 9 | -------------------------------------------------------------------------------- /skpro/metrics/survival/__init__.py: -------------------------------------------------------------------------------- 1 | """Metrics for time-to-event or survival prediction.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | __author__ = ["fkiraly"] 5 | 6 | __all__ = [ 7 | "ConcordanceHarrell", 8 | "SPLL", 9 | ] 10 | 11 | from skpro.metrics.survival._c_harrell import ConcordanceHarrell 12 | from skpro.metrics.survival._spll import SPLL 13 | -------------------------------------------------------------------------------- /skpro/datatypes/_table/__init__.py: -------------------------------------------------------------------------------- 1 | """Module exports: Series type checkers, converters and mtype inference.""" 2 | 3 | from skpro.datatypes._table._convert import convert_dict as convert_dict_Table 4 | from skpro.datatypes._table._registry import MTYPE_LIST_TABLE, MTYPE_REGISTER_TABLE 5 | 6 | __all__ = [ 7 | "convert_dict_Table", 8 | "MTYPE_LIST_TABLE", 9 | "MTYPE_REGISTER_TABLE", 10 | ] 11 | -------------------------------------------------------------------------------- /skpro/survival/compose/__init__.py: -------------------------------------------------------------------------------- 1 | """Survival or time-to-event prediction estimators, composers.""" 2 | 3 | from skpro.regression.compose import Pipeline 4 | from skpro.survival.compose._reduce_cond_unc import ConditionUncensored 5 | from skpro.survival.compose._reduce_uncensored import FitUncensored 6 | 7 | __all__ = [ 8 | "Pipeline", 9 | "FitUncensored", 10 | "ConditionUncensored", 11 | ] 12 | -------------------------------------------------------------------------------- /skpro/distributions/adapters/scipy/__init__.py: -------------------------------------------------------------------------------- 1 | """Adapters for probability distribution objects, scipy facing.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.distributions.adapters.scipy._distribution import _ScipyAdapter 5 | from skpro.distributions.adapters.scipy._empirical import empirical_from_discrete 6 | 7 | __all__ = ["empirical_from_discrete", "_ScipyAdapter"] 8 | -------------------------------------------------------------------------------- /skpro/survival/aft/__init__.py: -------------------------------------------------------------------------------- 1 | """Module containing accelerated failure time models.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | __all__ = ["AFTFisk", "AFTLogNormal", "AFTWeibull"] 5 | 6 | from skpro.survival.aft._aft_lifelines_fisk import AFTFisk 7 | from skpro.survival.aft._aft_lifelines_lognormal import AFTLogNormal 8 | from skpro.survival.aft._aft_lifelines_weibull import AFTWeibull 9 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | python: 7 | install: 8 | - method: pip 9 | path: . 10 | extra_requirements: 11 | - docs 12 | build: 13 | os: ubuntu-22.04 14 | tools: 15 | python: "3.11" 16 | 17 | sphinx: 18 | configuration: docs/source/conf.py 19 | # fail_on_warning: True 20 | -------------------------------------------------------------------------------- /skpro/regression/online/__init__.py: -------------------------------------------------------------------------------- 1 | """Meta-algorithms to build online regression models.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.regression.online._dont_refit import OnlineDontRefit 5 | from skpro.regression.online._refit import OnlineRefit 6 | from skpro.regression.online._refit_every import OnlineRefitEveryN 7 | 8 | __all__ = ["OnlineDontRefit", "OnlineRefit", "OnlineRefitEveryN"] 9 | -------------------------------------------------------------------------------- /skpro/regression/bayesian/__init__.py: -------------------------------------------------------------------------------- 1 | """Base classes for Bayesian probabilistic regression.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | __all__ = [ 5 | "BayesianConjugateLinearRegressor", 6 | "BayesianLinearRegressor", 7 | ] 8 | 9 | from skpro.regression.bayesian._linear_conjugate import BayesianConjugateLinearRegressor 10 | from skpro.regression.bayesian._linear_mcmc import BayesianLinearRegressor 11 | -------------------------------------------------------------------------------- /docs/source/about/mission.rst: -------------------------------------------------------------------------------- 1 | .. _mission: 2 | 3 | ======= 4 | Mission 5 | ======= 6 | 7 | The goal of the ``skpro`` project is to provide a unified package for 8 | using, building, and evaluating predictive probabilistic machine learning models, 9 | following `scikit-learn`_ and `sktime`_ design principles. 10 | 11 | The wider (non-technical) mission is identical with that of `sktime`_: 12 | 13 | .. _sktime: https://www.sktime.net/en/stable/index.html 14 | -------------------------------------------------------------------------------- /docs/source/api_reference/base.rst: -------------------------------------------------------------------------------- 1 | .. _base_ref: 2 | 3 | Base 4 | ==== 5 | 6 | The :mod:`skpro.base` module contains abstract base classes. 7 | 8 | .. automodule:: skpro.base 9 | :no-members: 10 | :no-inherited-members: 11 | 12 | Base classes 13 | ------------ 14 | 15 | .. currentmodule:: skpro.base 16 | 17 | .. autosummary:: 18 | :toctree: auto_generated/ 19 | :template: class.rst 20 | 21 | BaseObject 22 | BaseEstimator 23 | BaseMetaEstimator 24 | -------------------------------------------------------------------------------- /docs/source/developer_guide/add_estimators.rst: -------------------------------------------------------------------------------- 1 | .. _developer_guide_add_estimators: 2 | 3 | ======================= 4 | Implementing Estimators 5 | ======================= 6 | 7 | ``skpro`` follows the same extension principles as ``sktime`` - we advise to read the ``sktime`` documentation on this topic: 8 | `here `__ 9 | 10 | The same workflows apply, using ``skpro`` extension templates and ``check_estimator``. 11 | -------------------------------------------------------------------------------- /skpro/distributions/base/__init__.py: -------------------------------------------------------------------------------- 1 | """Probability distribution objects.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | # adapted from sktime 4 | 5 | __all__ = ["BaseDistribution", "_DelegatedDistribution", "_BaseArrayDistribution"] 6 | 7 | from skpro.distributions.base._base import BaseDistribution 8 | from skpro.distributions.base._base_array import _BaseArrayDistribution 9 | from skpro.distributions.base._delegate import _DelegatedDistribution 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/report-all-other-issues-or-questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Report all other issues or questions 3 | about: Let us know about anything else not covered by one of our specific issue types. 4 | title: '' 5 | labels: needs triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Desbribe the issue** 11 | 15 | -------------------------------------------------------------------------------- /docs/source/tutorials.rst: -------------------------------------------------------------------------------- 1 | .. _tutorials: 2 | 3 | Tutorials 4 | ========= 5 | 6 | Below are introductory tutorials for ``skpro``. 7 | 8 | Each tutorial is located in its own repository, which contains notebooks and links to a youtube video walkthrough. 9 | 10 | .. note:: 11 | 12 | There are no video tutorials yet! Stay tuned... 13 | 14 | For user guides specific to learning tasks, see our :ref:`user_guide` page. 15 | The user guide notebooks are always functional with the most recent stable version. 16 | -------------------------------------------------------------------------------- /skpro/datatypes/_proba/__init__.py: -------------------------------------------------------------------------------- 1 | """Type checkers, converters and mtype inference for probabilistic return types.""" 2 | 3 | from skpro.datatypes._proba._check import check_dict as check_dict_Proba 4 | from skpro.datatypes._proba._convert import convert_dict as convert_dict_Proba 5 | from skpro.datatypes._proba._registry import MTYPE_LIST_PROBA, MTYPE_REGISTER_PROBA 6 | 7 | __all__ = [ 8 | "check_dict_Proba", 9 | "convert_dict_Proba", 10 | "MTYPE_LIST_PROBA", 11 | "MTYPE_REGISTER_PROBA", 12 | ] 13 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | commit-message: 8 | prefix: "[MNT] [Dependabot]" 9 | include: "scope" 10 | labels: 11 | - "maintenance" 12 | - package-ecosystem: "github-actions" 13 | directory: "/" 14 | schedule: 15 | interval: "daily" 16 | commit-message: 17 | prefix: "[MNT] [Dependabot]" 18 | include: "scope" 19 | labels: 20 | - "maintenance" 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/maintenance-issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Maintenance issue 3 | about: Suggest a maintenance update 4 | title: "[MNT]" 5 | labels: maintenance 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the maintenance issue** 11 | 12 | 17 | -------------------------------------------------------------------------------- /skpro/regression/__init__.py: -------------------------------------------------------------------------------- 1 | """Probabilitistic supervised regression estimators.""" 2 | 3 | from skpro.regression.conformal import ( 4 | MapieConformalizedQuantileRegressor, 5 | MapieCrossConformalRegressor, 6 | MapieSplitConformalRegressor, 7 | ) 8 | from skpro.regression.jackknife import MapieJackknifeAfterBootstrapRegressor 9 | 10 | __all__ = [ 11 | "MapieSplitConformalRegressor", 12 | "MapieCrossConformalRegressor", 13 | "MapieConformalizedQuantileRegressor", 14 | "MapieJackknifeAfterBootstrapRegressor", 15 | ] 16 | -------------------------------------------------------------------------------- /skpro/regression/conformal/__init__.py: -------------------------------------------------------------------------------- 1 | """MAPIE Conformal Regressors.""" 2 | 3 | from skpro.regression.conformal._mapie_cqr import MapieConformalizedQuantileRegressor 4 | from skpro.regression.conformal._mapie_cross_conformal import ( 5 | MapieCrossConformalRegressor, 6 | ) 7 | from skpro.regression.conformal._mapie_split_conformal import ( 8 | MapieSplitConformalRegressor, 9 | ) 10 | 11 | __all__ = [ 12 | "MapieSplitConformalRegressor", 13 | "MapieCrossConformalRegressor", 14 | "MapieConformalizedQuantileRegressor", 15 | ] 16 | -------------------------------------------------------------------------------- /docs/source/api_reference.rst: -------------------------------------------------------------------------------- 1 | .. _api_reference: 2 | 3 | ============= 4 | API Reference 5 | ============= 6 | 7 | Welcome to the API reference for ``skpro``. 8 | 9 | The API reference provides a technical manual. 10 | It describes the classes and functions included in ``skpro``. 11 | 12 | .. include:: includes/api_css.rst 13 | 14 | .. toctree:: 15 | :maxdepth: 1 16 | 17 | api_reference/regression 18 | api_reference/survival 19 | api_reference/distributions 20 | api_reference/metrics 21 | api_reference/base 22 | api_reference/utils 23 | -------------------------------------------------------------------------------- /skpro/registry/__init__.py: -------------------------------------------------------------------------------- 1 | """Registry and lookup functionality.""" 2 | 3 | from skpro.registry._craft import craft, deps, imports 4 | from skpro.registry._lookup import all_objects, all_tags 5 | from skpro.registry._scitype import scitype 6 | from skpro.registry._tags import ( 7 | OBJECT_TAG_LIST, 8 | OBJECT_TAG_REGISTER, 9 | check_tag_is_valid, 10 | ) 11 | 12 | __all__ = [ 13 | "OBJECT_TAG_LIST", 14 | "OBJECT_TAG_REGISTER", 15 | "all_objects", 16 | "all_tags", 17 | "check_tag_is_valid", 18 | "craft", 19 | "deps", 20 | "imports", 21 | "scitype", 22 | ] 23 | -------------------------------------------------------------------------------- /skpro/survival/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | """Cox proportional hazards models.""" 2 | 3 | from skpro.survival.ensemble._grad_boost_sksurv import ( 4 | SurvGradBoostCompSkSurv, 5 | SurvGradBoostSkSurv, 6 | ) 7 | from skpro.survival.ensemble._ngboost_surv import NGBoostSurvival 8 | from skpro.survival.ensemble._survforest_sksurv import ( 9 | SurvivalForestSkSurv, 10 | SurvivalForestXtraSkSurv, 11 | ) 12 | 13 | __all__ = [ 14 | "SurvGradBoostSkSurv", 15 | "SurvGradBoostCompSkSurv", 16 | "SurvivalForestSkSurv", 17 | "SurvivalForestXtraSkSurv", 18 | "NGBoostSurvival", 19 | ] 20 | -------------------------------------------------------------------------------- /skpro/tests/_config.py: -------------------------------------------------------------------------------- 1 | """Test configs.""" 2 | 3 | # -------------------- 4 | # configs for test run 5 | # -------------------- 6 | 7 | # whether to test only estimators from modules that are changed w.r.t. main 8 | # default is False, can be set to True by pytest --only_changed_modules True flag 9 | ONLY_CHANGED_MODULES = False 10 | 11 | 12 | # list of str, names of estimators to exclude from testing 13 | # WARNING: tests for these estimators will be skipped 14 | EXCLUDE_ESTIMATORS = [ 15 | "DummySkipped", 16 | "ClassName", # exclude classes from extension templates 17 | ] 18 | 19 | 20 | EXCLUDED_TESTS = {} 21 | -------------------------------------------------------------------------------- /skpro/datatypes/_proba/_registry.py: -------------------------------------------------------------------------------- 1 | """Registry of mtypes for Proba scitype. 2 | 3 | See datatypes._registry for API. 4 | """ 5 | 6 | import pandas as pd 7 | 8 | __all__ = [ 9 | "MTYPE_REGISTER_PROBA", 10 | "MTYPE_LIST_PROBA", 11 | ] 12 | 13 | 14 | MTYPE_REGISTER_PROBA = [ 15 | ("pred_interval", "Proba", "predictive intervals"), 16 | ("pred_quantiles", "Proba", "quantile predictions"), 17 | ("pred_var", "Proba", "variance predictions"), 18 | # ("pred_dost", "Proba", "full distribution predictions, tensorflow-probability"), 19 | ] 20 | 21 | MTYPE_LIST_PROBA = pd.DataFrame(MTYPE_REGISTER_PROBA)[0].values 22 | -------------------------------------------------------------------------------- /.github/.codecov.yml: -------------------------------------------------------------------------------- 1 | # paths to ignore 2 | ignore: 3 | - "docs/**/*" 4 | - "build_tools/**/*" 5 | - "examples/*" 6 | - ".github/*" 7 | - ".binder/*" 8 | - "extension_templates/*" 9 | - "*.md" 10 | - "*.yml" 11 | - "*.yaml" 12 | 13 | # PR status check 14 | coverage: 15 | status: 16 | project: 17 | default: 18 | # threshold: 1% 19 | informational: true 20 | patch: 21 | default: 22 | informational: true 23 | 24 | # post coverage report as comment on PR 25 | comment: false 26 | 27 | # enable codecov to report to GitHub status checks 28 | github_checks: 29 | annotations: false 30 | -------------------------------------------------------------------------------- /skpro/utils/index.py: -------------------------------------------------------------------------------- 1 | """Utility functions for working with indices.""" 2 | 3 | import numpy as np 4 | 5 | 6 | def random_ss_ix(ix, size, replace=True): 7 | """Randomly uniformly sample indices from a list of indices. 8 | 9 | Parameters 10 | ---------- 11 | ix : pd.Index or subsettable iterable via getitem 12 | list of indices to sample from 13 | size : int 14 | number of indices to sample 15 | replace : bool, default=True 16 | whether to sample with replacement 17 | """ 18 | a = range(len(ix)) 19 | ixs = ix[np.random.choice(a, size=size, replace=replace)] 20 | return ixs 21 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | branch = True 4 | source = skpro 5 | # omit = bad_file.py 6 | 7 | [report] 8 | # Regexes for lines to exclude from consideration 9 | exclude_lines = 10 | # Have to re-enable the standard pragma 11 | pragma: no cover 12 | 13 | # Don't complain about missing debug-only code: 14 | def __repr__ 15 | if self\.debug 16 | 17 | # Don't complain if tests don't hit defensive assertion code: 18 | raise AssertionError 19 | raise NotImplementedError 20 | 21 | # Don't complain if non-runnable code isn't run: 22 | if 0: 23 | if __name__ == .__main__.: 24 | -------------------------------------------------------------------------------- /CITATION.rst: -------------------------------------------------------------------------------- 1 | Gressmann, F., Király, F. J., Mateen, B., & Oberhauser, H. (2018). Probabilistic supervised learning. ArXiv:1801.00753 [Cs, Math, Stat]. Retrieved from http://arxiv.org/abs/1801.00753 :: 2 | 3 | @article{skpro, 4 | archivePrefix = {arXiv}, 5 | eprinttype = {arxiv}, 6 | eprint = {1801.00753}, 7 | primaryClass = {cs, math, stat}, 8 | title = {Probabilistic Supervised Learning}, 9 | url = {http://arxiv.org/abs/1801.00753}, 10 | urldate = {2018-01-03}, 11 | date = {2018-01-02}, 12 | author = {Gressmann, Frithjof and Kir{\'a}ly, Franz J. and Mateen, Bilal and Oberhauser, Harald} 13 | } 14 | -------------------------------------------------------------------------------- /docs/source/about/governance.rst: -------------------------------------------------------------------------------- 1 | .. _governance: 2 | 3 | ========== 4 | Governance 5 | ========== 6 | 7 | .. topic:: This project is part of the ``sktime`` project. 8 | 9 | The ``skpro`` repository and community is currently to be considered part of 10 | ``sktime``, and not a separate entity. It is maintained by the ``sktime`` team. 11 | It is THEREFORE subject to rules and provisions of ``sktime``, 12 | see `sktime governance `_. 13 | The below are draft documents for a potentially later stage, copied from ``sktime``. 14 | In case of discrepancy, ``sktime`` documents apply. 15 | -------------------------------------------------------------------------------- /skpro/regression/linear/__init__.py: -------------------------------------------------------------------------------- 1 | """Linear regression models.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.regression.dummy import DummyProbaRegressor 5 | from skpro.regression.linear._glm import GLMRegressor 6 | from skpro.regression.linear._glum import GlumRegressor 7 | from skpro.regression.linear._sklearn import ARDRegression, BayesianRidge 8 | from skpro.regression.linear._sklearn_poisson import PoissonRegressor 9 | 10 | __all__ = [ 11 | "ARDRegression", 12 | "BayesianRidge", 13 | "GLMRegressor", 14 | "GlumRegressor", 15 | "PoissonRegressor", 16 | "DummyProbaRegressor", 17 | ] 18 | -------------------------------------------------------------------------------- /skpro/utils/numpy.py: -------------------------------------------------------------------------------- 1 | """Utility functions for numpy/sklearn related matters.""" 2 | 3 | __authors__ = ["fkiraly"] 4 | 5 | 6 | def flatten_to_1D_if_colvector(y): 7 | """Flattens a numpy array to 1D if it is a 2D column vector. 8 | 9 | Parameters 10 | ---------- 11 | y : numpy array, 1D or 2D 12 | Array to flatten 13 | 14 | Returns 15 | ------- 16 | y_flat : numpy array 17 | 1D flattened array if y was 2D column vector, or 1D already 18 | otherwise, return y unchanged 19 | """ 20 | if len(y.shape) == 2 and y.shape[1] == 1: 21 | y_flat = y.flatten() 22 | else: 23 | y_flat = y 24 | 25 | return y_flat 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation issue 3 | about: Suggest how we can improve our documentation 4 | title: "[DOC]" 5 | labels: documentation 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the documentation issue** 11 | 12 | 15 | 16 | **Suggest a concrete fix/improvement** 17 | 18 | 21 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # The file specifies framework level core developers for automated review requests 2 | # 3 | # Note: historically, this file has been used to maintain a list of 4 | # algorithm maintainers as specified in GOVERNANCE.md. 5 | # This is no longer the case, algorithm maintainers are now 6 | # specified directly in the estimator, 7 | # in the "maintainers" tag of the respective scikit-base object. 8 | # 9 | # Algorithm maintainers are programmatically queryable 10 | # via Estimator.get_class_tag("maintainers"). 11 | # Further lookup such as "which algorithms does M maintain" 12 | # can be carried out using registry.all_objects 13 | 14 | * @benheid @felipeangelimvieira @fkiraly @fnhirwa @geetu040 @pranavvp16 @sairevanth25 @XinyuWuu 15 | -------------------------------------------------------------------------------- /docs/source/get_started.rst: -------------------------------------------------------------------------------- 1 | .. _getting_started: 2 | 3 | =========== 4 | Get Started 5 | =========== 6 | 7 | The following information is designed to get users up and running with 8 | ``skpro`` quickly. For more detailed information, see the links in each 9 | of the subsections. 10 | 11 | Installation 12 | ============ 13 | 14 | ``skpro`` currently supports: 15 | 16 | * environments with python version 3.8, 3.9, 3.10, 3.11, or 3.12. 17 | * operating systems Mac OS X, Unix-like OS, Windows 8.1 and higher 18 | * installation via ``PyPi`` or ``conda`` 19 | 20 | Please see the :ref:`installation ` guide for step-by-step instructions on the package installation. 21 | 22 | .. _scikit-learn: https://scikit-learn.org/stable/index.html 23 | -------------------------------------------------------------------------------- /skpro/utils/pandas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 -u 2 | """Utilities for pandas adapbation.""" 3 | 4 | __author__ = ["fkiraly"] 5 | 6 | 7 | def df_map(x): 8 | """Access map or applymap, of DataFrame. 9 | 10 | In pandas 2.1.0, applymap was deprecated in favor of the newly introduced map. 11 | To ensure compatibility with older versions, we use map if available, 12 | otherwise applymap. 13 | 14 | Parameters 15 | ---------- 16 | x : assumed pd.DataFrame 17 | 18 | Returns 19 | ------- 20 | x.map, if available, otherwise x.applymap 21 | Note: returns method itself, not result of method call 22 | """ 23 | if hasattr(x, "map"): 24 | return x.map 25 | else: 26 | return x.applymap 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Temporary and binary files 2 | *~ 3 | *.py[cod] 4 | *.so 5 | *.cfg 6 | !setup.cfg 7 | *.orig 8 | *.log 9 | *.pot 10 | __pycache__/* 11 | .cache/* 12 | .*.swp 13 | */.ipynb_checkpoints/* 14 | 15 | # Project files 16 | .ropeproject 17 | .project 18 | .pydevproject 19 | .settings 20 | .idea 21 | .vscode 22 | 23 | # Package files 24 | *.egg 25 | *.eggs/ 26 | .installed.cfg 27 | *.egg-info 28 | 29 | # Unittest and coverage 30 | htmlcov/* 31 | .coverage 32 | .tox 33 | junit.xml 34 | coverage.xml 35 | tests/.hypothesis/ 36 | .hypothesis/ 37 | 38 | # Build and docs folder/files 39 | build/* 40 | dist/* 41 | sdist/* 42 | docs/api/* 43 | docs/_build/* 44 | cover/* 45 | MANIFEST 46 | 47 | # Virtual environments 48 | .venv/ 49 | venv/ 50 | env/ 51 | ENV/ 52 | -------------------------------------------------------------------------------- /.binder/Dockerfile: -------------------------------------------------------------------------------- 1 | # This Dockerfile is used to build skpro when launching binder. 2 | # Find out more at: https://mybinder.readthedocs.io/en/latest/index.html 3 | 4 | FROM jupyter/scipy-notebook:python-3.11.6 5 | # Set up user to avoid running as root 6 | ARG NB_USER 7 | ARG NB_UID 8 | ENV USER ${NB_USER} 9 | ENV HOME /home/${NB_USER} 10 | 11 | # Binder will automatically clone the repo, but we need to make sure the 12 | # contents of our repo are in the ${HOME} directory 13 | COPY . ${HOME} 14 | USER root 15 | RUN chown -R ${NB_UID} ${HOME} 16 | 17 | # Switch user and directory 18 | USER ${USER} 19 | WORKDIR ${HOME} 20 | 21 | # Install extra requirements and skpro based on main branch 22 | RUN pip install --upgrade pip --no-cache-dir && pip install .[binder] 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[ENH]" 5 | labels: feature request 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | 14 | 15 | **Describe the solution you'd like** 16 | 19 | 20 | **Describe alternatives you've considered** 21 | 24 | 25 | 26 | **Additional context** 27 | 30 | -------------------------------------------------------------------------------- /skpro/tests/utils.py: -------------------------------------------------------------------------------- 1 | """Legacy module: test utils.""" 2 | # LEGACY MODULE - TODO: remove or refactor 3 | 4 | import numpy as np 5 | 6 | 7 | def assert_close_prediction(y_hat, y_true, fraction=0.75, within=0.25): 8 | """Check that defined fraction of predictions lies in a certain tolerance. 9 | 10 | Parameters 11 | ---------- 12 | y_hat Predictions 13 | y_true True values 14 | fraction Fraction of close values 15 | within Relative tolerance to assume when comparing the values 16 | 17 | Raises 18 | ------ 19 | AssertionError 20 | """ 21 | predictions_within_tolerance = np.count_nonzero( 22 | np.isclose(y_hat, y_true, rtol=within) 23 | ) 24 | target = len(y_true) * fraction 25 | 26 | assert predictions_within_tolerance > target 27 | -------------------------------------------------------------------------------- /docs/source/includes/api_css.rst: -------------------------------------------------------------------------------- 1 | .. 2 | File to ..include in the API ref document. 3 | 4 | .. raw:: html 5 | 6 | 30 | -------------------------------------------------------------------------------- /skpro/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | """Metrics for probabilistic supervised regression.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | # adapted from sktime 4 | 5 | __author__ = ["fkiraly", "euanenticott-shell"] 6 | 7 | __all__ = [ 8 | "CRPS", 9 | "AUCalibration", 10 | "ConstraintViolation", 11 | "EmpiricalCoverage", 12 | "IntervalWidth", 13 | "LogLoss", 14 | "LinearizedLogLoss", 15 | "PinballLoss", 16 | "SquaredDistrLoss", 17 | # survival metrics 18 | "ConcordanceHarrell", 19 | "SPLL", 20 | ] 21 | 22 | from skpro.metrics._classes import ( 23 | CRPS, 24 | AUCalibration, 25 | ConstraintViolation, 26 | EmpiricalCoverage, 27 | IntervalWidth, 28 | LinearizedLogLoss, 29 | LogLoss, 30 | PinballLoss, 31 | SquaredDistrLoss, 32 | ) 33 | from skpro.metrics.survival import SPLL, ConcordanceHarrell 34 | -------------------------------------------------------------------------------- /skpro/utils/sklearn.py: -------------------------------------------------------------------------------- 1 | """Utility functions for adapting to sklearn.""" 2 | 3 | import numpy as np 4 | 5 | 6 | def prep_skl_df(df, copy_df=False): 7 | """Make df compatible with sklearn input expectations. 8 | 9 | Changes: 10 | turns column index into a list of strings 11 | 12 | Parameters 13 | ---------- 14 | df : pd.DataFrame 15 | list of indices to sample from 16 | copy_df : bool, default=False 17 | whether to mutate df or return a copy 18 | if False, index of df is mutated 19 | if True, original df is not mutated. If index is not a list of strings, 20 | a copy is made and the copy is mutated. Otherwise, the original df is returned. 21 | """ 22 | cols = df.columns 23 | str_cols = cols.astype(str) 24 | 25 | if not np.all(str_cols == cols): 26 | if copy_df: 27 | df = df.copy() 28 | df.columns = str_cols 29 | 30 | return df 31 | -------------------------------------------------------------------------------- /skpro/datatypes/_table/_registry.py: -------------------------------------------------------------------------------- 1 | """Registry of mtypes for Table scitype. 2 | 3 | See datatypes._registry for API. 4 | """ 5 | 6 | import pandas as pd 7 | 8 | __all__ = [ 9 | "MTYPE_REGISTER_TABLE", 10 | "MTYPE_LIST_TABLE", 11 | ] 12 | 13 | 14 | MTYPE_REGISTER_TABLE = [ 15 | ("pd_DataFrame_Table", "Table", "pd.DataFrame representation of a data table"), 16 | ("numpy1D", "Table", "1D np.narray representation of a univariate table"), 17 | ("numpy2D", "Table", "2D np.narray representation of a univariate table"), 18 | ("pd_Series_Table", "Table", "pd.Series representation of a data table"), 19 | ("list_of_dict", "Table", "list of dictionaries with primitive entries"), 20 | ("polars_eager_table", "Table", "polars.DataFrame representation of a data table"), 21 | ("polars_lazy_table", "Table", "polars.LazyFrame representation of a data table"), 22 | ] 23 | 24 | MTYPE_LIST_TABLE = pd.DataFrame(MTYPE_REGISTER_TABLE)[0].values 25 | -------------------------------------------------------------------------------- /docs/source/user_guide.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _user_guide: 3 | 4 | ========== 5 | User Guide 6 | ========== 7 | 8 | Welcome to skpro's user guide! 9 | 10 | The user guide consists of introductory notebooks, ordered by learning task and object type. 11 | 12 | For guided tutorials with videos, see our :ref:`tutorials` page. 13 | 14 | To run the user guide notebooks interactively, you can 15 | `launch them on binder `_ 16 | without having to install anything. 17 | 18 | We assume basic familiarity with `scikit-learn`_. If you haven't worked with scikit-learn before, check out their 19 | `getting-started guide`_. 20 | 21 | The notebook files can be found `here `_. 22 | 23 | .. _scikit-learn: https://scikit-learn.org/stable/ 24 | .. _getting-started guide: https://scikit-learn.org/stable/getting_started.html 25 | 26 | .. nbgallery:: 27 | :glob: 28 | 29 | examples/* 30 | -------------------------------------------------------------------------------- /skpro/distributions/tests/test_hurdle.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from skpro.distributions.hurdle import Hurdle 5 | from skpro.tests.test_switch import run_test_module_changed 6 | 7 | 8 | @pytest.mark.skipif( 9 | not run_test_module_changed("skpro.distributions"), 10 | reason="run only if skpro.distributions has been changed", 11 | ) 12 | @pytest.mark.parametrize("params", Hurdle.get_test_params()) 13 | def test_hurdle_less_than_zero(params): 14 | """Test that the index is correctly set after iat call.""" 15 | distribution = Hurdle(**params) 16 | 17 | v = -1.0 18 | 19 | funcs_and_expected = [ 20 | (distribution.cdf, 0.0), 21 | (distribution.pdf, 0.0), 22 | (distribution.pmf, 0.0), 23 | (distribution.log_pdf, -np.inf), 24 | (distribution.log_pmf, -np.inf), 25 | ] 26 | 27 | for func, expected in funcs_and_expected: 28 | values = func(v) 29 | assert (np.asarray(values) == expected).all() 30 | -------------------------------------------------------------------------------- /skpro/distributions/tests/test_empirical.py: -------------------------------------------------------------------------------- 1 | """Tests for Empirical distributions.""" 2 | 3 | import pandas as pd 4 | import pytest 5 | 6 | from skpro.distributions.empirical import Empirical 7 | from skpro.tests.test_switch import run_test_module_changed 8 | 9 | 10 | @pytest.mark.skipif( 11 | not run_test_module_changed("skpro.distributions"), 12 | reason="run only if skpro.distributions has been changed", 13 | ) 14 | def test_empirical_iat_index(): 15 | """Test that the index is correctly set after iat call.""" 16 | spl_idx = pd.MultiIndex.from_product([[0, 1], [0, 1, 2]], names=["sample", "time"]) 17 | spl = pd.DataFrame( 18 | [[0, 1], [2, 3], [10, 11], [6, 7], [8, 9], [4, 5]], 19 | index=spl_idx, 20 | columns=["a", "b"], 21 | ) 22 | emp = Empirical(spl, columns=["a", "b"]) 23 | 24 | emp_iat = emp.iat[0, 0] 25 | assert emp_iat.shape == () 26 | 27 | assert not isinstance(emp_iat.spl.index, pd.MultiIndex) 28 | assert (emp_iat.spl.index == [0, 1]).all() 29 | -------------------------------------------------------------------------------- /skpro/regression/tests/test_glum.py: -------------------------------------------------------------------------------- 1 | """Tests Glum regressor.""" 2 | 3 | import pytest 4 | 5 | from skpro.regression.linear import GlumRegressor 6 | from skpro.tests.test_switch import run_test_for_class 7 | 8 | 9 | @pytest.mark.skipif( 10 | not run_test_for_class(GlumRegressor), 11 | reason="run test only if softdeps are present and incrementally (if requested)", 12 | ) 13 | def test_glum_simple_use(): 14 | """Test simple use of Glum regressor.""" 15 | from sklearn.datasets import load_diabetes 16 | from sklearn.model_selection import train_test_split 17 | 18 | X, y = load_diabetes(return_X_y=True, as_frame=True) 19 | X = X.iloc[:200] 20 | y = y.iloc[:200] 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) 22 | 23 | reg = GlumRegressor(family="normal") 24 | reg.fit(X_train, y_train) 25 | y_pred = reg.predict(X_test) 26 | y_pred_proba = reg.predict_proba(X_test) 27 | 28 | assert len(y_pred) == len(y_test) 29 | assert len(y_pred_proba) == len(y_test) 30 | -------------------------------------------------------------------------------- /skpro/registry/tests/test_tags.py: -------------------------------------------------------------------------------- 1 | """Tests for tag register an tag functionality.""" 2 | 3 | from skpro.registry._tags import OBJECT_TAG_REGISTER 4 | 5 | 6 | def test_tag_register_type(): 7 | """Test the specification of the tag register. See _tags for specs.""" 8 | assert isinstance(OBJECT_TAG_REGISTER, list) 9 | assert all(isinstance(tag, tuple) for tag in OBJECT_TAG_REGISTER) 10 | 11 | for tag in OBJECT_TAG_REGISTER: 12 | assert len(tag) == 4 13 | assert isinstance(tag[0], str) 14 | assert isinstance(tag[1], (str, list)) 15 | if isinstance(tag[1], list): 16 | assert all(isinstance(x, str) for x in tag[1]) 17 | assert isinstance(tag[2], (str, tuple)) 18 | if isinstance(tag[2], tuple): 19 | assert len(tag[2]) == 2 20 | assert isinstance(tag[2][0], str) 21 | assert isinstance(tag[2][1], (list, str)) 22 | if isinstance(tag[2][1], list): 23 | assert all(isinstance(x, str) for x in tag[2][1]) 24 | assert isinstance(tag[3], str) 25 | -------------------------------------------------------------------------------- /docs/source/contribute/code_of_conduct.rst: -------------------------------------------------------------------------------- 1 | .. _coc: 2 | 3 | =============== 4 | Code of conduct 5 | =============== 6 | 7 | The ``skpro`` project believes that everyone should be able to participate 8 | in our community without fear of harassment or discrimination. All contributors 9 | are expected to show respect and courtesy to other members of the community 10 | at all times. 11 | 12 | As an offshoot of the sktime project, we ask all members of the community to conform 13 | to the sktime project's 14 | `Code of Conduct `_. 15 | 16 | If you need to report a Code of Conduct incident, reach out 17 | to Dr. Franz Király by email at franz.kiraly@sktime.net. 18 | 19 | .. note:: 20 | 21 | ``skpro`` is currently maintained by the ``sktime`` community, and subject to its 22 | Code of Conduct processes (including how to report incidents). 23 | This may change as the project matures. 24 | However, ``skpro``'s Code of Conduct will remain 25 | dedicated to promoting a community without harassment and discrimination. 26 | -------------------------------------------------------------------------------- /skpro/base/_base.py: -------------------------------------------------------------------------------- 1 | """Base class and template for regressors and transformers.""" 2 | from skbase.base import BaseEstimator as _BaseEstimator 3 | from skbase.base import BaseMetaEstimator as _BaseMetaEstimator 4 | from skbase.base import BaseObject as _BaseObject 5 | 6 | 7 | class _CommonTags: 8 | """Mixin for common tag definitions to all estimator base classes.""" 9 | 10 | # config common to all estimators 11 | _config = {} 12 | 13 | _tags = { 14 | "estimator_type": "estimator", 15 | "authors": "skpro developers", 16 | "maintainers": "skpro developers", 17 | } 18 | 19 | @property 20 | def name(self): 21 | """Return the name of the object or estimator.""" 22 | return self.__class__.__name__ 23 | 24 | 25 | class BaseObject(_CommonTags, _BaseObject): 26 | """Base class for fittable objects.""" 27 | 28 | def __init__(self): 29 | super().__init__() 30 | 31 | 32 | class BaseEstimator(_CommonTags, _BaseEstimator): 33 | """Base class for fittable objects.""" 34 | 35 | 36 | class BaseMetaEstimator(_CommonTags, _BaseMetaEstimator): 37 | """Base class for fittable composite meta-objects.""" 38 | -------------------------------------------------------------------------------- /.github/workflows/dependency-review.yml: -------------------------------------------------------------------------------- 1 | # Dependency Review Action 2 | # 3 | # This Action will scan dependency manifest files that change as part of a Pull Request, surfacing known-vulnerable versions of the packages declared or updated in the PR. Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging. 4 | # 5 | # Source repository: https://github.com/actions/dependency-review-action 6 | # Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement 7 | name: 'Dependency Review' 8 | on: [pull_request] 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | dependency-review: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: 'Checkout Repository' 18 | uses: actions/checkout@v6 19 | - name: 'Dependency Review' 20 | uses: actions/dependency-review-action@v4 21 | with: 22 | # Possible values: "critical", "high", "moderate", "low" 23 | # Will fail if a PR introduces a security vulnerability 24 | # that is the specified level or higher 25 | fail-on-severity: high 26 | -------------------------------------------------------------------------------- /skpro/datatypes/_convert_utils/_coerce.py: -------------------------------------------------------------------------------- 1 | """Conercion utilities for mtypes.""" 2 | 3 | __author__ = ["fkiraly"] 4 | 5 | import pandas as pd 6 | 7 | 8 | def _is_nullable_numeric(dtype): 9 | return dtype in ["Int64", "Float64", "boolean"] 10 | 11 | 12 | def _coerce_df_dtypes(obj): 13 | """Coerce pandas objects to non-nullable column types. 14 | 15 | Returns shallow copy and does not mutate input `obj`. 16 | 17 | Parameters 18 | ---------- 19 | obj: pandas Series or DataFrame, or any object 20 | 21 | Returns 22 | ------- 23 | obj unchanged, if obj is not pandas Series or DataFrame 24 | if obj is pandas Series or DataFrame, 25 | coerces nullable numeric columns to float (by reference via astype) 26 | """ 27 | if isinstance(obj, pd.Series): 28 | if _is_nullable_numeric(obj.dtype): 29 | return obj.astype("float") 30 | return obj 31 | 32 | if isinstance(obj, pd.DataFrame): 33 | nullable_cols = [ 34 | col for col in obj.columns if _is_nullable_numeric(obj.dtypes[col]) 35 | ] 36 | if len(nullable_cols) > 0: 37 | obj = obj.astype({col: "float" for col in nullable_cols}) 38 | return obj 39 | 40 | return obj 41 | -------------------------------------------------------------------------------- /skpro/datatypes/__init__.py: -------------------------------------------------------------------------------- 1 | """Module exports: data type definitions, checks, validation, fixtures, converters.""" 2 | # this module has been adapted from sktime 3 | # it is largely copy-pasting the Proba and Table parts 4 | # todo: factor this out into a common base 5 | 6 | __author__ = ["fkiraly"] 7 | 8 | from skpro.datatypes._check import ( 9 | check_is_error_msg, 10 | check_is_mtype, 11 | check_is_scitype, 12 | check_raise, 13 | mtype, 14 | scitype, 15 | ) 16 | from skpro.datatypes._convert import convert, convert_to 17 | from skpro.datatypes._examples import get_examples 18 | from skpro.datatypes._registry import ( 19 | MTYPE_LIST_PROBA, 20 | MTYPE_LIST_TABLE, 21 | MTYPE_REGISTER, 22 | SCITYPE_LIST, 23 | SCITYPE_REGISTER, 24 | mtype_to_scitype, 25 | scitype_to_mtype, 26 | ) 27 | 28 | __all__ = [ 29 | "check_is_error_msg", 30 | "check_is_mtype", 31 | "check_is_scitype", 32 | "check_raise", 33 | "convert", 34 | "convert_to", 35 | "mtype", 36 | "get_examples", 37 | "mtype_to_scitype", 38 | "MTYPE_REGISTER", 39 | "MTYPE_LIST_PROBA", 40 | "MTYPE_LIST_TABLE", 41 | "scitype", 42 | "scitype_to_mtype", 43 | "SCITYPE_LIST", 44 | "SCITYPE_REGISTER", 45 | ] 46 | -------------------------------------------------------------------------------- /skpro/metrics/_coerce.py: -------------------------------------------------------------------------------- 1 | """Output coercion utilities for metric classes.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | # adapted from sktime 4 | 5 | import pandas as pd 6 | 7 | 8 | def _coerce_to_scalar(obj): 9 | """Coerce obj to scalar, from polymorphic input scalar or pandas.""" 10 | if isinstance(obj, pd.DataFrame): 11 | assert len(obj) == 1 12 | assert len(obj.columns) == 1 13 | return obj.iloc[0, 0] 14 | if isinstance(obj, pd.Series): 15 | assert len(obj) == 1 16 | return obj.iloc[0] 17 | return obj 18 | 19 | 20 | def _coerce_to_df(obj): 21 | """Coerce to pd.DataFrame, from polymorphic input scalar or pandas.""" 22 | return pd.DataFrame(obj) 23 | 24 | 25 | def _coerce_to_series(obj): 26 | """Coerce to pd.Series, from polymorphic input scalar or pandas.""" 27 | if isinstance(obj, pd.DataFrame): 28 | assert len(obj.columns) == 1 29 | return obj.iloc[:, 0] 30 | elif isinstance(obj, pd.Series): 31 | return obj 32 | else: 33 | return pd.Series(obj) 34 | 35 | 36 | def _coerce_to_1d_numpy(obj): 37 | """Coerce to 1D np.ndarray, from pd.DataFrame or pd.Series.""" 38 | if isinstance(obj, (pd.DataFrame, pd.Series)): 39 | obj = obj.values 40 | return obj.flatten() 41 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | """Main configuration file for pytest. 2 | 3 | Contents: 4 | adds an --only_changed_modules option to pytest 5 | this allows to turn on/off differential testing (for shorter runtime) 6 | "on" condition ensures that only estimators are tested that have changed, 7 | more precisely, only estimators whose class is in a module 8 | that has changed compared to the main branch 9 | by default, this is off, including for default local runs of pytest 10 | """ 11 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 12 | 13 | __author__ = ["fkiraly"] 14 | 15 | import os 16 | 17 | from skbase.utils.dependencies import _check_soft_dependencies 18 | 19 | # used to prevent tkinter related errors in CI 20 | if _check_soft_dependencies("matplotlib", severity="none"): 21 | if os.environ.get("GITHUB_ACTIONS") == "true": 22 | import matplotlib 23 | 24 | matplotlib.use("Agg") 25 | 26 | 27 | def pytest_addoption(parser): 28 | """Pytest command line parser options adder.""" 29 | parser.addoption( 30 | "--only_changed_modules", 31 | default=False, 32 | help="test only estimators from modules that have changed compared to main", 33 | ) 34 | 35 | 36 | def pytest_configure(config): 37 | """Pytest configuration preamble.""" 38 | from skpro.tests import _config 39 | 40 | if config.getoption("--only_changed_modules") in [True, "True"]: 41 | _config.ONLY_CHANGED_MODULES = True 42 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test = pytest 3 | 4 | [tool.isort] 5 | profile = "black" 6 | multi_line_output = 3 7 | 8 | [tool:pytest] 9 | # ignore certain folders and pytest warnings 10 | addopts = 11 | --ignore build_tools 12 | --ignore examples 13 | --ignore docs 14 | --durations 10 15 | --timeout 600 16 | --cov skpro 17 | --cov-report xml 18 | --cov-report html 19 | --showlocals 20 | --only_changed_modules True 21 | -n auto 22 | filterwarnings = 23 | ignore::UserWarning 24 | ignore:numpy.dtype size changed 25 | ignore:numpy.ufunc size changed 26 | 27 | [flake8] 28 | # Default flake8 3.5 ignored flags 29 | ignore = E121, E123, E126, E226, E24, E704, W503, W504 30 | # inline with Black code formatter 31 | max-line-length = 88 32 | exclude = 33 | skpro/_contrib/* 34 | extend-ignore = 35 | # See https://github.com/PyCQA/pycodestyle/issues/373 36 | E203 37 | 38 | [metadata] 39 | description_file = README.md 40 | long_description_content_type = text/markdown 41 | 42 | [check-manifest] 43 | ignore = 44 | .binder/** 45 | .all-contributorsrc 46 | .coveragerc 47 | examples/** 48 | build_tools/** 49 | __check_build/** 50 | docs/** 51 | Makefile 52 | CODEOWNERS 53 | CONTRIBUTING.md 54 | *.yaml 55 | *.yml 56 | 57 | [isort] 58 | profile = black 59 | 60 | [pydocstyle] 61 | convention = numpy 62 | match = (?!test_).*\.py 63 | 64 | [bdist_wheel] 65 | universal = false 66 | 67 | [sdist] 68 | formats = gztar 69 | -------------------------------------------------------------------------------- /skpro/utils/random_state.py: -------------------------------------------------------------------------------- 1 | """Utilities for handling the random_state variable.""" 2 | # copied from scikit-learn to avoid dependency on sklearn private methods 3 | 4 | import numpy as np 5 | from sklearn.utils import check_random_state 6 | 7 | 8 | def set_random_state(estimator, random_state=0): 9 | """Set fixed random_state parameters for an estimator. 10 | 11 | Finds all parameters ending ``random_state`` and sets them to integers 12 | derived from ``random_state``. 13 | 14 | Parameters 15 | ---------- 16 | estimator : estimator supporting get_params, set_params 17 | Estimator with potential randomness managed by random_state parameters. 18 | 19 | random_state : int, RandomState instance or None, default=None 20 | Pseudo-random number generator to control the generation of the random 21 | integers. Pass an int for reproducible output across multiple function calls. 22 | 23 | Notes 24 | ----- 25 | This does not necessarily set *all* ``random_state`` attributes that 26 | control an estimator's randomness, only those accessible through 27 | ``estimator.get_params()``. 28 | """ 29 | random_state = check_random_state(random_state) 30 | to_set = {} 31 | for key in sorted(estimator.get_params(deep=True)): 32 | if key == "random_state" or key.endswith("__random_state"): 33 | to_set[key] = random_state.randint(np.iinfo(np.int32).max) 34 | 35 | if to_set: 36 | estimator.set_params(**to_set) 37 | -------------------------------------------------------------------------------- /skpro/datatypes/_common.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Common private utilities for checkers and converters.""" 3 | 4 | __author__ = ["fkiraly"] 5 | 6 | 7 | def _metadata_requested(return_metadata): 8 | """Return whether some metadata has been requested.""" 9 | return not isinstance(return_metadata, bool) or return_metadata 10 | 11 | 12 | def _ret(valid, msg, metadata, return_metadata): 13 | """Return switch for checker functions.""" 14 | if _metadata_requested(return_metadata): 15 | return valid, msg, metadata 16 | else: 17 | return valid 18 | 19 | 20 | def _req(key, return_metadata): 21 | """Return whether metadata key is requested, boolean.""" 22 | if isinstance(return_metadata, bool): 23 | return return_metadata 24 | elif isinstance(return_metadata, str) and not isinstance(key, list): 25 | return return_metadata == key 26 | elif isinstance(return_metadata, str) and isinstance(key, list): 27 | return return_metadata in key 28 | elif isinstance(return_metadata, list) and not isinstance(key, list): 29 | return key in return_metadata 30 | elif isinstance(return_metadata, list) and isinstance(key, list): 31 | return len(set(key).intersection(return_metadata)) > 0 32 | else: 33 | return False 34 | 35 | 36 | def _wr(d, key, val, return_metadata): 37 | """Metadata write switch for checker functions.""" 38 | if _req(key, return_metadata): 39 | d[key] = val 40 | 41 | return d 42 | -------------------------------------------------------------------------------- /docs/source/about/history.rst: -------------------------------------------------------------------------------- 1 | .. _history: 2 | 3 | ======= 4 | History 5 | ======= 6 | 7 | ``skpro`` was started in 2017 by Franz Király and his then-student Frithjof Gressmann 8 | as a `scikit-learn`_ like python package for probabilistic supervised regression. 9 | 10 | ``skpro`` was then abandoned, from 2019, at version 1.0.1, as development in 11 | Franz Király research group continued to be focused on ``sktime``. 12 | 13 | In 2022-23, ``sktime``'s base module was turned into a separate package, 14 | `skbase`_, intended as a workbench to allow easy templating and creation of 15 | `scikit-learn`-likes. 16 | 17 | Using the templating scaffold of ``skbase``, ``skpro`` was finally revived 18 | in 2023 by Franz Király, Frithjof Gressmann, Anirban Ray, and Alex Gregory, 19 | built upon a fully rearchitectured, ``skbase`` reliant API, 20 | as version 2.0.0. 21 | 22 | The joint base interface enables mutual compabitibilty between ``skpro``, ``sklearn``, 23 | and ``sktime``, with ``skpro`` probabilistic regressors being potential components used 24 | for probabilistic forecasting in ``sktime``. 25 | 26 | Development is supported by members of the ``sktime`` project, 27 | new core developers and the broader community (see 28 | `contributors `_). 29 | 30 | If you are interested in contributing, check out our 31 | :ref:`Contributing ` guide. 32 | 33 | .. _scikit-learn: https://scikit-learn.org/stable/index.html 34 | .. _skbase: https://skbase.readthedocs.io/en/latest/ 35 | .. _sktime: https://www.sktime.net/en/stable/index.html 36 | -------------------------------------------------------------------------------- /skpro/registry/tests/test_scitype.py: -------------------------------------------------------------------------------- 1 | """Tests for scitype typing function.""" 2 | 3 | import pytest 4 | 5 | from skpro.registry._scitype import scitype 6 | 7 | 8 | @pytest.mark.parametrize("coerce_to_list", [True, False]) 9 | def test_scitype(coerce_to_list): 10 | """Test that the scitype function recovers the correct scitype(s).""" 11 | from skpro.distributions.laplace import Laplace 12 | from skpro.regression.mapie import MapieRegressor 13 | from skpro.regression.residual import ResidualDouble 14 | 15 | # test that scitype works for classes with soft dependencies 16 | result_mapie = scitype(MapieRegressor, coerce_to_list=coerce_to_list) 17 | if coerce_to_list: 18 | assert isinstance(result_mapie, list) 19 | assert "regressor_proba" == result_mapie[0] 20 | else: 21 | assert "regressor_proba" == result_mapie 22 | 23 | # test that scitype works for instances 24 | inst = ResidualDouble.create_test_instance() 25 | result_naive = scitype(inst, coerce_to_list=coerce_to_list) 26 | if coerce_to_list: 27 | assert isinstance(result_naive, list) 28 | assert "regressor_proba" == result_naive[0] 29 | else: 30 | assert "regressor_proba" == result_naive 31 | 32 | # test distribution object 33 | result_transformer = scitype(Laplace, coerce_to_list=coerce_to_list) 34 | if coerce_to_list: 35 | assert isinstance(result_transformer, list) 36 | assert "distribution" == result_transformer[0] 37 | else: 38 | assert "distribution" == result_transformer 39 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## How to contribute to skpro 2 | 3 | #### **Did you find a bug?** 4 | 5 | * **Ensure the bug was not already reported** by searching on GitHub under [Issues](https://github.com/sktime/skpro/issues). 6 | 7 | * If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/sktime/skpro/issues/new). Be sure to include a **title and clear description**, as much relevant information as possible, and a **code sample** or an **executable test case** demonstrating the expected behavior that is not occurring. 8 | 9 | * Please follow the further discussion in case more information is needed or questions arise. 10 | 11 | #### **Did you write a patch that fixes a bug?** 12 | 13 | * Open a new GitHub pull request with the patch. 14 | 15 | * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable. 16 | 17 | #### **Do you intend to add a new feature or change an existing one?** 18 | 19 | * Suggest your change in an issue and offer to implement the feature. 20 | 21 | * Wait for positive feedback in order to avoid double work (maybe your idea is already in development). 22 | 23 | * Implement and send a PR 24 | 25 | #### **Do you have questions about the source code?** 26 | 27 | * Ask any question about how to use skpro using the forum. 28 | 29 | #### **Do you want to contribute to the skpro documentation?** 30 | 31 | * Please send PR that propose changes to the docs directory 32 | 33 | skpro is a team effort. We encourage you to pitch in and join us! 34 | 35 | Thanks! :heart: :heart: :heart: 36 | 37 | skpro Team 38 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017 - 2023, The skpro developers. 4 | 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /skpro/datatypes/_table/_base.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Base class for data types.""" 3 | 4 | __author__ = ["fkiraly"] 5 | 6 | from skpro.datatypes._base import BaseDatatype 7 | 8 | 9 | class BaseTable(BaseDatatype): 10 | """Base class for Table data types. 11 | 12 | Parameters are inferred by check. 13 | 14 | Parameters 15 | ---------- 16 | is_univariate: bool 17 | True iff table has one variable 18 | is_empty: bool 19 | True iff table has no variables or no instances 20 | has_nans: bool 21 | True iff the table contains NaN values 22 | n_instances: int 23 | number of instances/rows in the table 24 | n_features: int 25 | number of variables in table 26 | feature_names: list of int or object 27 | names of variables in table 28 | """ 29 | 30 | _tags = { 31 | "scitype": "Table", 32 | "name": None, # any string 33 | "name_python": None, # lower_snake_case 34 | "name_aliases": [], 35 | "python_version": None, 36 | "python_dependencies": None, 37 | } 38 | 39 | def __init__( 40 | self, 41 | is_univariate=None, 42 | is_empty=None, 43 | has_nans=None, 44 | n_instances=None, 45 | n_features=None, 46 | feature_names=None, 47 | ): 48 | self.is_univariate = is_univariate 49 | self.is_empty = is_empty 50 | self.has_nans = has_nans 51 | self.n_instances = n_instances 52 | self.n_features = n_features 53 | self.feature_names = feature_names 54 | 55 | super().__init__() 56 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 14 | 15 | **To Reproduce** 16 | 21 | 22 | 23 | **Expected behavior** 24 | 27 | 28 | 29 | **Environment** 30 | 35 | 36 | 37 | **Additional context** 38 | 41 | -------------------------------------------------------------------------------- /skpro/distributions/adapters/scipy/_empirical.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Empirical distribution.""" 3 | 4 | __author__ = ["fkiraly"] 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | def empirical_from_discrete(dist, index=None, columns=None): 11 | """Convert a list of scipy discrete distributions to an skpro Empirical object. 12 | 13 | Parameters 14 | ---------- 15 | dist : list of rv_discrete 16 | List of scipy discrete distributions, instances of rv_discrete. 17 | index : pd.Index or coercible, optional 18 | Index of the resulting empirical distribution. 19 | Must be the same length as dist. 20 | columns : pd.Index or coercible, optional 21 | Columns of the resulting empirical distribution. 22 | Must be of length 1. 23 | """ 24 | from skpro.distributions.empirical import Empirical 25 | 26 | if index is None: 27 | index = pd.RangeIndex(len(dist)) 28 | 29 | xks = [d.xk for d in dist] 30 | pks = [d.pk for d in dist] 31 | 32 | lens = [len(xk) for xk in xks] 33 | idxs_inst = [np.repeat(index[i], leni) for i, leni in enumerate(lens)] 34 | idx_inst_flat = np.concatenate(idxs_inst) 35 | idx_spl = [np.arange(leni) for leni in lens] 36 | idx_spl_flat = np.concatenate(idx_spl) 37 | 38 | idx_mult = pd.MultiIndex.from_arrays([idx_spl_flat, idx_inst_flat]) 39 | 40 | spl = pd.DataFrame(np.concatenate(xks), index=idx_mult, columns=columns) 41 | weights = pd.Series(np.concatenate(pks), index=idx_mult) 42 | 43 | emp = Empirical( 44 | spl=spl, weights=weights, time_indep=True, index=index, columns=columns 45 | ) 46 | return emp 47 | -------------------------------------------------------------------------------- /skpro/utils/utils.py: -------------------------------------------------------------------------------- 1 | # LEGACY MODULE - TODO: remove or refactor 2 | 3 | 4 | def not_existing(f): 5 | """ 6 | Decorates an interface method to declare it theoretically non existent 7 | 8 | Parameters 9 | ---------- 10 | f Method to decorate 11 | 12 | Returns 13 | ------- 14 | Decorated method 15 | """ 16 | f.not_existing = True 17 | 18 | return f 19 | 20 | 21 | def ensure_existence(f): 22 | """Ensures that method is not marked as non_existent 23 | 24 | Parameters 25 | ---------- 26 | f Method 27 | 28 | Raises 29 | ------ 30 | NotImplementedError if the method is marked as non existent 31 | 32 | Returns 33 | ------- 34 | Method f 35 | """ 36 | if getattr(f, "not_existing", False): 37 | raise NotImplementedError( 38 | "The distribution has no " + f.__name__ + " function. " 39 | "You may use an adapter that supports its approximation." 40 | ) 41 | 42 | return f 43 | 44 | 45 | def to_percent(value, return_float=True): 46 | """Converts values into a percent representation 47 | 48 | Args: 49 | value: int/float 50 | Number representing a percentage 51 | return_float: bool 52 | If true, float representing the percentage is returned 53 | 54 | Returns: int/float 55 | A percentage 56 | """ 57 | 58 | def percent(p): 59 | if return_float: 60 | return float(p) 61 | else: 62 | return int(round(p * 100)) 63 | 64 | if isinstance(value, int): 65 | value = float(value) / 100.0 66 | 67 | if value <= 0: 68 | return percent(0) 69 | else: 70 | return percent(value) 71 | -------------------------------------------------------------------------------- /skpro/distributions/adapters/statsmodels/_empirical.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Empirical distribution.""" 3 | 4 | __author__ = ["fkiraly"] 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | def empirical_from_rvdf(dist, index=None, columns=None): 11 | """Convert a statsmodels rv_discrete_float to an skpro Empirical object. 12 | 13 | Parameters 14 | ---------- 15 | dist : rv_discrte_float object 16 | Instance of rv_discrete. 17 | index : pd.Index or coercible, optional 18 | Index of the resulting empirical distribution. 19 | Must be the same length as dist. 20 | columns : pd.Index or coercible, optional 21 | Columns of the resulting empirical distribution. 22 | Must be of length 1. 23 | """ 24 | from skpro.distributions.empirical import Empirical 25 | 26 | if index is None: 27 | index = pd.RangeIndex(len(dist)) 28 | 29 | xk = dist.xk 30 | pk = dist.pk 31 | 32 | xks = [xk[i] for i in range(len(xk))] 33 | pks = [pk[i] for i in range(len(pk))] 34 | 35 | lens = [len(xk) for xk in xks] 36 | idxs_inst = [np.repeat(index[i], leni) for i, leni in enumerate(lens)] 37 | idx_inst_flat = np.concatenate(idxs_inst) 38 | idx_spl = [np.arange(leni) for leni in lens] 39 | idx_spl_flat = np.concatenate(idx_spl) 40 | 41 | idx_mult = pd.MultiIndex.from_arrays([idx_spl_flat, idx_inst_flat]) 42 | 43 | spl = pd.DataFrame(np.concatenate(xks), index=idx_mult, columns=columns) 44 | weights = pd.Series(np.concatenate(pks), index=idx_mult) 45 | 46 | emp = Empirical( 47 | spl=spl, weights=weights, time_indep=True, index=index, columns=columns 48 | ) 49 | return emp 50 | -------------------------------------------------------------------------------- /skpro/regression/base/adapters/_sklearn.py: -------------------------------------------------------------------------------- 1 | """Adapters to sklearn linear regressors with probabilistic components.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | __author__ = ["fkiraly"] 5 | 6 | from skpro.regression.base._delegate import _DelegatedProbaRegressor 7 | 8 | 9 | class _DelegateWithFittedParamForwarding(_DelegatedProbaRegressor): 10 | """Common base class for delegates with attribute forwarding. 11 | 12 | Assumes that delegate has an attribute `estimator_`, 13 | from which fitted attributes are forwarded to self. 14 | """ 15 | 16 | # attribute for _DelegatedProbaRegressor, which then delegates 17 | # all non-overridden methods are same as of getattr(self, _delegate_name) 18 | # see further details in _DelegatedRegressor docstring 19 | _delegate_name = "_estimator" 20 | # _estimator, not estimator_, because we do not want to expose it as 21 | # fitted params - fitted params are instead forwarded 22 | 23 | def _fit(self, X, y): 24 | """Fit regressor to training data. 25 | 26 | Writes to self: 27 | Sets fitted model attributes ending in "_". 28 | 29 | Parameters 30 | ---------- 31 | X : pandas DataFrame 32 | feature instances to fit regressor to 33 | y : pandas DataFrame, must be same length as X 34 | labels to fit regressor to 35 | 36 | Returns 37 | ------- 38 | self : reference to self 39 | """ 40 | estimator = self._get_delegate() 41 | estimator.fit(X=X, y=y) 42 | 43 | for attr in self.FITTED_PARAMS_TO_FORWARD: 44 | setattr(self, attr, getattr(estimator.estimator_, attr)) 45 | 46 | return self 47 | -------------------------------------------------------------------------------- /.github/workflows/update_contributors.yml: -------------------------------------------------------------------------------- 1 | name: Update Contributors 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * 6' # Sat 00:00 UTC (weekly) 6 | workflow_dispatch: 7 | 8 | jobs: 9 | generate-markdown-and-create-pr: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v6 13 | with: 14 | fetch-depth: 0 15 | 16 | - name: Set up Node 17 | uses: actions/setup-node@v6 18 | with: 19 | node-version: '18' 20 | 21 | - name: Install official all-contributors CLI 22 | run: npm install -g all-contributors-cli@6.24.0 23 | 24 | - name: Generate CONTRIBUTORS.md 25 | run: npx all-contributors generate 26 | 27 | - name: Commit and create PR if changed 28 | env: 29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | run: | 31 | git config user.name "github-actions[bot]" 32 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com" 33 | if ! git diff --quiet CONTRIBUTORS.md; then 34 | BRANCH="update-contributors-$(date +%F)" 35 | git checkout -b "$BRANCH" 36 | git add CONTRIBUTORS.md 37 | git commit -m "[MNT] all-contributors update" 38 | git push --set-upstream origin "$BRANCH" 39 | # create PR using gh (you can replace with actions/create-pull-request if preferred) 40 | gh auth login --with-token <<< "$GITHUB_TOKEN" 41 | gh pr create --title "[MNT] all-contributors update" \ 42 | --body "Automated update to CONTRIBUTORS.md generated by workflow run ${{ github.run_id }}." \ 43 | --head "$BRANCH" --base main 44 | else 45 | echo "No changes to CONTRIBUTORS.md" 46 | fi 47 | -------------------------------------------------------------------------------- /docs/source/contribute.rst: -------------------------------------------------------------------------------- 1 | .. _contrib_guide: 2 | 3 | ============ 4 | Get Involved 5 | ============ 6 | 7 | ``skpro`` is a community-driven project and your help is extremely welcome. If you 8 | get stuck, please don't hesitate to chat with us or raise an issue. 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | :hidden: 13 | 14 | developer_guide 15 | contribute/team 16 | contribute/code_of_conduct 17 | 18 | .. grid:: 1 2 2 2 19 | :gutter: 3 20 | 21 | .. grid-item-card:: How to Contribute 22 | :text-align: center 23 | 24 | New to ``skpro``? Learn how you can contribute. 25 | 26 | +++ 27 | 28 | .. button-ref:: contribute/how_to_contribute 29 | :color: primary 30 | :click-parent: 31 | :expand: 32 | 33 | Contribute 34 | 35 | .. grid-item-card:: Developer guide 36 | :text-align: center 37 | 38 | Help develop ``skpro``. 39 | 40 | +++ 41 | 42 | .. button-ref:: developer_guide 43 | :color: primary 44 | :click-parent: 45 | :expand: 46 | 47 | Development 48 | 49 | .. grid-item-card:: Development Team 50 | :text-align: center 51 | 52 | Meet ``skpro``'s core development team. 53 | 54 | +++ 55 | 56 | .. button-ref:: contribute/team 57 | :color: primary 58 | :click-parent: 59 | :expand: 60 | 61 | Development Team 62 | 63 | .. grid-item-card:: Code of Conduct 64 | :text-align: center 65 | 66 | Understand our code of conduct. 67 | 68 | +++ 69 | 70 | .. button-ref:: contribute/code_of_conduct 71 | :color: primary 72 | :click-parent: 73 | :expand: 74 | 75 | Code of Conduct 76 | -------------------------------------------------------------------------------- /docs/source/users.rst: -------------------------------------------------------------------------------- 1 | .. _user_documentation: 2 | 3 | ============= 4 | Documentation 5 | ============= 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :hidden: 10 | 11 | installation 12 | tutorials 13 | user_guide 14 | changelog 15 | related_software 16 | 17 | 18 | .. grid:: 1 2 2 2 19 | :gutter: 3 20 | 21 | .. grid-item-card:: Installation 22 | :text-align: center 23 | 24 | Install ``skpro``. 25 | 26 | +++ 27 | 28 | .. button-ref:: installation 29 | :color: primary 30 | :click-parent: 31 | :expand: 32 | 33 | Installation 34 | 35 | .. grid-item-card:: Tutorials 36 | :text-align: center 37 | 38 | Introductory Tutorials. 39 | 40 | +++ 41 | 42 | .. button-ref:: tutorials 43 | :color: primary 44 | :click-parent: 45 | :expand: 46 | 47 | Tutorials 48 | 49 | .. grid-item-card:: User Guide 50 | :text-align: center 51 | 52 | Learn about using ``skpro``. 53 | 54 | +++ 55 | 56 | .. button-ref:: user_guide 57 | :color: primary 58 | :click-parent: 59 | :expand: 60 | 61 | User Guide 62 | 63 | .. grid-item-card:: Changelog 64 | :text-align: center 65 | 66 | Information for developers. 67 | 68 | +++ 69 | 70 | .. button-ref:: changelog 71 | :color: primary 72 | :click-parent: 73 | :expand: 74 | 75 | Changelog 76 | 77 | .. grid-item-card:: Related Software 78 | :text-align: center 79 | 80 | Explore software related to ``skpro``. 81 | 82 | +++ 83 | 84 | .. button-ref:: related_software 85 | :color: primary 86 | :click-parent: 87 | :expand: 88 | 89 | Related Software 90 | -------------------------------------------------------------------------------- /docs/source/developer_guide.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _developer_guide: 3 | 4 | =============== 5 | Developer Guide 6 | =============== 7 | 8 | Welcome to skpro's developer guide! 9 | 10 | New developers should: 11 | 12 | * sign up to the developer Discord (see link in README) and say hello in the ``#contributors`` channel 13 | * install a development version of ``skpro``, see :ref:`installation` 14 | * set up CI tests locally and ensure they know how to check them remotely, see :ref:`continuous_integration` 15 | * get familiar with the git workflow (:ref:`git_workflow`) and coding standards (:ref:`coding_standards`) 16 | * feel free, at any point in time, to post questions on Discord, or ask core developers for help (see here for a `list of core developers `_) 17 | 18 | * feel free to join the collaborative coding sessions for pair programming or getting help on developer set-up 19 | 20 | Further special topics are listed below. 21 | 22 | sktime follows `scikit-learn `_\ 's API whenever possible. 23 | If you're new to scikit-learn, take a look at their `getting-started guide `_. 24 | If you're already familiar with scikit-learn, you may still learn something new from their `developers' guide `_. 25 | 26 | .. toctree:: 27 | :maxdepth: 1 28 | 29 | installation 30 | developer_guide/git_workflow 31 | developer_guide/continuous_integration 32 | developer_guide/coding_standards 33 | developer_guide/reviewer_guide 34 | developer_guide/add_estimators 35 | developer_guide/add_dataset 36 | developer_guide/deprecation 37 | developer_guide/dependencies 38 | developer_guide/documentation 39 | developer_guide/testing_framework 40 | developer_guide/release 41 | developer_guide/contrib_roadmap 42 | developer_guide/contrib_governance 43 | -------------------------------------------------------------------------------- /skpro/utils/_maint/tests/test_show_versions.py: -------------------------------------------------------------------------------- 1 | """Tests for the show_versions utility.""" 2 | import pathlib 3 | import uuid 4 | 5 | from skbase.utils.dependencies import _check_soft_dependencies 6 | 7 | from skpro.utils._maint._show_versions import ( 8 | DEFAULT_DEPS_TO_SHOW, 9 | _get_deps_info, 10 | show_versions, 11 | ) 12 | 13 | 14 | def test_show_versions_runs(): 15 | """Test that show_versions runs without exceptions.""" 16 | # only prints, should return None 17 | assert show_versions() is None 18 | 19 | 20 | def test_deps_info(): 21 | """Test that _get_deps_info returns package/version dict as per contract.""" 22 | deps_info = _get_deps_info() 23 | assert isinstance(deps_info, dict) 24 | assert set(deps_info.keys()) == {"sktime"} 25 | 26 | deps_info_default = _get_deps_info(DEFAULT_DEPS_TO_SHOW) 27 | assert isinstance(deps_info_default, dict) 28 | assert set(deps_info_default.keys()) == set(DEFAULT_DEPS_TO_SHOW) 29 | 30 | KEY_ALIAS = {"sklearn": "scikit-learn", "skbase": "scikit-base"} 31 | 32 | for key in DEFAULT_DEPS_TO_SHOW: 33 | pkg_name = KEY_ALIAS.get(key, key) 34 | key_is_available = _check_soft_dependencies(pkg_name, severity="none") 35 | assert (deps_info_default[key] is None) != key_is_available 36 | if key_is_available: 37 | assert _check_soft_dependencies(f"{pkg_name}=={deps_info_default[key]}") 38 | deps_single_key = _get_deps_info([key]) 39 | assert set(deps_single_key.keys()) == {key} 40 | 41 | 42 | def test_deps_info_deps_missing_package_present_directory(): 43 | """Test that _get_deps_info does not fail if a dependency is missing.""" 44 | dummy_package_name = uuid.uuid4().hex 45 | 46 | dummy_folder_path = pathlib.Path(dummy_package_name) 47 | dummy_folder_path.mkdir() 48 | 49 | assert _get_deps_info([dummy_package_name]) == {dummy_package_name: None} 50 | 51 | dummy_folder_path.rmdir() 52 | -------------------------------------------------------------------------------- /skpro/distributions/poisson.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Poisson probability distribution.""" 3 | 4 | __author__ = ["fkiraly", "malikrafsan"] 5 | 6 | import pandas as pd 7 | from scipy.stats import poisson, rv_discrete 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class Poisson(_ScipyAdapter): 13 | """Poisson distribution. 14 | 15 | Most methods wrap ``scipy.stats.poisson``. 16 | 17 | Parameters 18 | ---------- 19 | mu : float or array of float (1D or 2D) 20 | mean of the distribution 21 | index : pd.Index, optional, default = RangeIndex 22 | columns : pd.Index, optional, default = RangeIndex 23 | 24 | Examples 25 | -------- 26 | >>> from skpro.distributions import Poisson 27 | 28 | >>> distr = Poisson(mu=[[1, 1], [2, 3], [4, 5]]) 29 | """ 30 | 31 | _tags = { 32 | "capabilities:approx": ["energy", "pdfnorm"], 33 | "capabilities:exact": ["mean", "var", "pmf", "log_pmf", "cdf", "ppf"], 34 | "distr:measuretype": "discrete", 35 | "distr:paramtype": "parametric", 36 | "broadcast_init": "on", 37 | } 38 | 39 | def __init__(self, mu, index=None, columns=None): 40 | self.mu = mu 41 | 42 | super().__init__(index=index, columns=columns) 43 | 44 | def _get_scipy_object(self) -> rv_discrete: 45 | return poisson 46 | 47 | def _get_scipy_param(self) -> dict: 48 | mu = self._bc_params["mu"] 49 | 50 | return [mu], {} 51 | 52 | @classmethod 53 | def get_test_params(cls, parameter_set="default"): 54 | """Return testing parameter settings for the estimator.""" 55 | params1 = {"mu": [[1, 1], [2, 3], [4, 5]]} 56 | params2 = { 57 | "mu": 0.1, 58 | "index": pd.Index([1, 2, 5]), 59 | "columns": pd.Index(["a", "b"]), 60 | } 61 | return [params1, params2] 62 | -------------------------------------------------------------------------------- /skpro/regression/tests/test_ondil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | from skbase.utils.dependencies import _check_soft_dependencies 5 | 6 | from skpro.regression.ondil import OndilOnlineGamlss 7 | 8 | 9 | @pytest.mark.skipif( 10 | not _check_soft_dependencies(["ondil"], severity="none"), 11 | reason="skip test if ondil is not installed in environment", 12 | ) 13 | def test_ondil_instantiation_and_get_test_params(): 14 | """Basic smoke test for the Ondil wrapper. 15 | 16 | The test is skipped if the optional dependency ``ondil`` is not 17 | installed. It verifies that ``get_test_params`` returns at least one 18 | parameter set and that the estimator can be instantiated with it. 19 | """ 20 | # ensure ondil import succeeds at runtime; skip the test if import fails 21 | pytest.importorskip("ondil") 22 | 23 | params = OndilOnlineGamlss.get_test_params() 24 | if isinstance(params, dict): 25 | params = [params] 26 | assert len(params) >= 1 27 | 28 | p = params[0] 29 | est = OndilOnlineGamlss(**p) 30 | assert isinstance(est, OndilOnlineGamlss) 31 | 32 | 33 | @pytest.mark.skipif( 34 | not _check_soft_dependencies(["ondil"], severity="none"), 35 | reason="skip test if ondil is not installed in environment", 36 | ) 37 | def test_ondil_fit_smoke(): 38 | """Try a light-weight fit call on tiny data to validate wiring. 39 | 40 | This is a smoke test only; if the upstream API requires more complex 41 | constructor args or data handling, the test will be adjusted later. 42 | """ 43 | # create tiny dataset 44 | X = pd.DataFrame({"a": [0.0, 1.0, 2.0]}) 45 | y = pd.DataFrame(np.array([[0.1], [1.1], [1.9]])) 46 | 47 | # ensure ondil import succeeds at runtime; skip the test if import fails 48 | pytest.importorskip("ondil") 49 | 50 | est = OndilOnlineGamlss() 51 | 52 | # fit should run without raising (best-effort); if upstream raises, 53 | # surface the error so developers can adapt the wrapper. 54 | est.fit(X, y) 55 | assert est.is_fitted 56 | -------------------------------------------------------------------------------- /docs/source/developer_guide/reviewer_guide.rst: -------------------------------------------------------------------------------- 1 | .. _reviewer_guide: 2 | .. _rev_guide: 3 | 4 | ============== 5 | Reviewer Guide 6 | ============== 7 | 8 | Pull Request reviewers play an important role in ``skpro``'s development. 9 | 10 | .. warning:: 11 | 12 | The reviewer guide is under development. If you have suggestions, open an 13 | issue or Pull Request. 14 | 15 | 16 | Triage 17 | ====== 18 | 19 | * Assign relevant labels 20 | * Assign to relevant project board 21 | * Title: Is it using the 3-letter codes? Is it understandable? 22 | * Description: Is it understandable? Any related issues/PRs? 23 | * CI checks: approval for first-time contributors, any help needed with 24 | code/doc quality checks? 25 | * Merge conflicts 26 | 27 | Code Review 28 | =========== 29 | 30 | * Unit testing: 31 | 32 | - Are the code changes tested? 33 | - Are the tests understandable? 34 | - Are all changes covered by tests? We usually aim for a test coverage of 35 | at least 90%. 36 | - Code coverage will be reported as part of the automated CI checks on GitHub 37 | 38 | * Test changes locally: Does everything work as expected? 39 | * Deprecation warnings: 40 | 41 | - Has the public API changed? 42 | - Have deprecation warnings been added before making the changes? 43 | 44 | .. _reviewer_guide_doc: 45 | 46 | Documentation Review 47 | ==================== 48 | 49 | * Are the docstrings complete and understandable to users? 50 | * Do they follow the NumPy format and ``skbase`` conventions? 51 | * If the same parameter, attribute, return object or error is included elsewhere 52 | in ``skpro`` are the docstring descriptions as similar as possible? 53 | * Does the online documentation render correctly with the changes? 54 | * Do the docstrings contain links to the relevant topics in the 55 | :ref:`glossary` or :ref:`user_guide`? 56 | 57 | .. note:: 58 | 59 | If a Pull Request does not meet ``skpro``'s 60 | :ref:`documentation guide ` a reviewer should 61 | require the documentation be updated prior to approving the Pull Request. 62 | -------------------------------------------------------------------------------- /docs/source/api_reference/metrics.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _metrics_ref: 3 | 4 | Performance metrics 5 | =================== 6 | 7 | The :mod:`skpro.metrics` module contains metrics for evaluating 8 | probabilistic predictions, including survival and time-to-event predictions. 9 | 10 | All metrics in ``skpro`` can be listed using the ``skpro.registry.all_objects`` utility, 11 | using ``object_types="metric"``, optionally filtered by tags. 12 | Valid tags can be listed using ``sktime.registry.all_tags``. 13 | 14 | Survival/time-to-event specific metrics in ``skpro`` can be listed 15 | by filtering by ``capability:survival`` being ``True``. 16 | 17 | All probabilistic metrics can be used for survival 18 | prediction, by default they will ignore the censoring information. 19 | Note: this is different from subsetting to uncensored observations. 20 | 21 | 22 | Quantile and interval prediction metrics 23 | ---------------------------------------- 24 | 25 | .. currentmodule:: skpro.metrics 26 | 27 | .. autosummary:: 28 | :toctree: auto_generated/ 29 | :template: class_with_call.rst 30 | 31 | PinballLoss 32 | EmpiricalCoverage 33 | ConstraintViolation 34 | IntervalWidth 35 | 36 | Distribution prediction metrics 37 | ------------------------------- 38 | 39 | Distribution predictions are also known as conditional distribution predictions. 40 | (or conditional density predictions, if continuous). 41 | 42 | .. currentmodule:: skpro.metrics 43 | 44 | .. autosummary:: 45 | :toctree: auto_generated/ 46 | :template: class_with_call.rst 47 | 48 | CRPS 49 | LogLoss 50 | LinearizedLogLoss 51 | SquaredDistrLoss 52 | AUCalibration 53 | 54 | Survival prediction metrics 55 | --------------------------- 56 | 57 | Survival or time-to-event predictions are a variant of distribution predictions, 58 | where the ground truth may be censored. 59 | These metrics take the censoring information into account. 60 | 61 | .. currentmodule:: skpro.metrics.survival 62 | 63 | .. autosummary:: 64 | :toctree: auto_generated/ 65 | :template: class_with_call.rst 66 | 67 | ConcordanceHarrell 68 | SPLL 69 | -------------------------------------------------------------------------------- /skpro/tests/_test_vm.py: -------------------------------------------------------------------------------- 1 | """Auxiliary script to test an estinator in its own virtual machine.""" 2 | 3 | __all__ = ["run_test_vm"] 4 | 5 | import os 6 | import platform 7 | 8 | from skbase.utils.dependencies import _check_estimator_deps, _check_soft_dependencies 9 | 10 | 11 | def run_test_vm(cls_name): 12 | """Test an estimator in its own virtual machine. 13 | 14 | Takes a string which is the name of a class in the skpro registry, 15 | and runs ``check_estimator`` on it in a separate virtual machine, 16 | with deps determined by the tag ``python_dependencies`` of the class. 17 | 18 | Does not run the test if python and operating system versions 19 | are incompatible with the estimator's dependencies, 20 | as checked via ``_check_estimator_deps``. 21 | 22 | Parameters 23 | ---------- 24 | cls_name : str 25 | Name of the estimator class to test, e.g., "ExampleForecaster". 26 | 27 | Raises 28 | ------ 29 | Exception 30 | if the ``check_estimator`` fails, or if the estimator is not found. 31 | """ 32 | from skpro.registry import craft 33 | from skpro.utils import check_estimator 34 | 35 | if _check_soft_dependencies("torch", severity="none"): 36 | # disable mps for macos runners if torch is available 37 | if platform.system() == "Darwin": 38 | import torch 39 | 40 | torch.backends.mps.is_available = lambda: False 41 | 42 | if _check_soft_dependencies("hf-xet", severity="none"): 43 | # to allow hf-xet to download models on macos runners on version `latest` 44 | if platform.system() == "Darwin": 45 | os.environ["HF_XET_NUM_CONCURRENT_RANGE_GETS"] = "4" 46 | 47 | cls = craft(cls_name) 48 | if _check_estimator_deps(cls, severity="none"): 49 | skips = cls.get_class_tag("tests:skip_by_name", None) 50 | check_estimator(cls, raise_exceptions=True, tests_to_exclude=skips) 51 | else: 52 | print( # noqa: T201 53 | f"Skipping estimator: {cls} due to incompatibility " 54 | "with python or OS version." 55 | ) # noqa: T201 56 | -------------------------------------------------------------------------------- /skpro/utils/_doctest.py: -------------------------------------------------------------------------------- 1 | """Doctest utilities.""" 2 | # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | import contextlib 5 | import doctest 6 | import io 7 | 8 | 9 | def run_doctest( 10 | f, 11 | verbose=False, 12 | name=None, 13 | compileflags=None, 14 | optionflags=doctest.ELLIPSIS, 15 | raise_on_error=True, 16 | ): 17 | """Run doctests for a given function or class, and return or raise. 18 | 19 | Parameters 20 | ---------- 21 | f : callable 22 | Function or class to run doctests for. 23 | verbose : bool, optional (default=False) 24 | If True, print the results of the doctests. 25 | name : str, optional (default=f.__name__, if available, otherwise "NoName") 26 | Name of the function or class. 27 | compileflags : int, optional (default=None) 28 | Flags to pass to the Python parser. 29 | optionflags : int, optional (default=doctest.ELLIPSIS) 30 | Flags to control the behaviour of the doctest. 31 | raise_on_error : bool, optional (default=True) 32 | If True, raise an exception if the doctests fail. 33 | 34 | Returns 35 | ------- 36 | doctest_output : str 37 | Output of the doctests. 38 | 39 | Raises 40 | ------ 41 | RuntimeError 42 | If raise_on_error=True and the doctests fail. 43 | """ 44 | doctest_output_io = io.StringIO() 45 | with contextlib.redirect_stdout(doctest_output_io): 46 | doctest.run_docstring_examples( 47 | f=f, 48 | globs=globals(), 49 | verbose=verbose, 50 | name=name, 51 | compileflags=compileflags, 52 | optionflags=optionflags, 53 | ) 54 | doctest_output = doctest_output_io.getvalue() 55 | 56 | if name is None: 57 | name = f.__name__ if hasattr(f, "__name__") else "NoName" 58 | 59 | if raise_on_error and len(doctest_output) > 0: 60 | raise RuntimeError( 61 | f"Docstring examples failed doctests " 62 | f"for {name}, doctest output: {doctest_output}" 63 | ) 64 | return doctest_output 65 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for easier installation and cleanup. 2 | # 3 | # Uses self-documenting macros from here: 4 | # http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html 5 | 6 | PACKAGE=skpro 7 | DOC_DIR=./docs 8 | BUILD_TOOLS=./build_tools 9 | TEST_DIR=testdir 10 | 11 | .PHONY: help release install test lint clean dist doc docs 12 | 13 | .DEFAULT_GOAL := help 14 | 15 | help: 16 | @grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) |\ 17 | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m\ 18 | %s\n", $$1, $$2}' 19 | 20 | test: ## Run unit tests 21 | -rm -rf ${TEST_DIR} 22 | mkdir -p ${TEST_DIR} 23 | cp .coveragerc ${TEST_DIR} 24 | cp setup.cfg ${TEST_DIR} 25 | python -m pytest 26 | 27 | test_check_suite: ## run only estimator contract tests in TestAll classes 28 | -rm -rf ${TEST_DIR} 29 | mkdir -p ${TEST_DIR} 30 | cp .coveragerc ${TEST_DIR} 31 | cp setup.cfg ${TEST_DIR} 32 | python -m pytest -k 'TestAll' $(PYTESTOPTIONS) 33 | 34 | test_softdeps_full: ## Run all non-suite unit tests without soft dependencies 35 | -rm -rf ${TEST_DIR} 36 | mkdir -p ${TEST_DIR} 37 | cp setup.cfg ${TEST_DIR} 38 | cd ${TEST_DIR} 39 | python -m pytest -v --showlocals --durations=20 -k 'not TestAll' $(PYTESTOPTIONS) 40 | 41 | tests: test 42 | 43 | clean: ## Clean build dist and egg directories left after install 44 | rm -rf ./dist 45 | rm -rf ./build 46 | rm -rf ./pytest_cache 47 | rm -rf ./htmlcov 48 | rm -rf ./junit 49 | rm -rf ./$(PACKAGE).egg-info 50 | rm -rf coverage.xml 51 | rm -f MANIFEST 52 | rm -rf ./wheelhouse/* 53 | find . -type f -iname "*.so" -delete 54 | find . -type f -iname '*.pyc' -delete 55 | find . -type d -name '__pycache__' -empty -delete 56 | 57 | dist: ## Make Python source distribution 58 | python3 setup.py sdist bdist_wheel 59 | 60 | build: 61 | python -m build --sdist --wheel --outdir wheelhouse 62 | 63 | docs: doc 64 | 65 | doc: ## Build documentation with Sphinx 66 | $(MAKE) -C $(DOC_DIR) html 67 | 68 | nb: clean 69 | rm -rf .venv || true 70 | python3 -m venv .venv 71 | . .venv/bin/activate && python -m pip install .[all_extras,binder] && ./build_tools/run_examples.sh 72 | -------------------------------------------------------------------------------- /skpro/regression/tests/test_glm.py: -------------------------------------------------------------------------------- 1 | """Tests Generalized Linear Model regressor.""" 2 | 3 | import pandas as pd 4 | import pytest 5 | 6 | from skpro.regression.linear import GLMRegressor 7 | from skpro.tests.test_switch import run_test_for_class 8 | 9 | 10 | @pytest.mark.skipif( 11 | not run_test_for_class(GLMRegressor), 12 | reason="run test only if softdeps are present and incrementally (if requested)", 13 | ) 14 | def test_glm_simple_use(): 15 | """Test simple use of GLM regressor.""" 16 | from sklearn.datasets import load_diabetes 17 | from sklearn.model_selection import train_test_split 18 | 19 | X, y = load_diabetes(return_X_y=True, as_frame=True) 20 | y = pd.DataFrame(y) 21 | X = X.iloc[:200] 22 | y = y.iloc[:200] 23 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) 24 | 25 | glm_reg = GLMRegressor() 26 | glm_reg.fit(X_train, y_train) 27 | y_pred = glm_reg.predict(X_test) 28 | y_pred_proba = glm_reg.predict_proba(X_test) 29 | 30 | assert y_pred.shape == y_test.shape 31 | assert y_pred_proba.shape == y_test.shape 32 | 33 | 34 | @pytest.mark.skipif( 35 | not run_test_for_class(GLMRegressor), 36 | reason="run test only if softdeps are present and incrementally (if requested)", 37 | ) 38 | def test_glm_with_offset_exposure(): 39 | """Test GLM with offset_var and exposure_var parameters.""" 40 | import numpy as np 41 | from sklearn.datasets import load_diabetes 42 | from sklearn.model_selection import train_test_split 43 | 44 | X, y = load_diabetes(return_X_y=True, as_frame=True) 45 | y = pd.DataFrame(y) 46 | X = X.iloc[:200] 47 | y = y.iloc[:200] 48 | X["off"] = np.ones(X.shape[0]) * 2.1 49 | X["exp"] = np.arange(1, X.shape[0] + 1) 50 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) 51 | 52 | glm_reg = GLMRegressor( 53 | family="Normal", link="Log", offset_var="off", exposure_var=-1 54 | ) 55 | glm_reg.fit(X_train, y_train) 56 | y_pred = glm_reg.predict(X_test) 57 | y_pred_proba = glm_reg.predict_proba(X_test) 58 | 59 | assert y_pred.shape == y_test.shape 60 | assert y_pred_proba.shape == y_test.shape 61 | -------------------------------------------------------------------------------- /docs/source/api_reference/utils.rst: -------------------------------------------------------------------------------- 1 | .. _utils_ref: 2 | 3 | Utility functions 4 | ================= 5 | 6 | ``skpro`` has a number of modules dedicated to utilities: 7 | 8 | * :mod:`skpro.datatypes`, which contains utilities for data format checks and conversion. 9 | * :mod:`skpro.registry`, which contains utilities for estimator and tag search 10 | * :mod:`skpro.utils`, which contains generic utility functions. 11 | 12 | 13 | Data Format Checking and Conversion 14 | ----------------------------------- 15 | 16 | :mod:`skpro.datatypes` 17 | 18 | .. automodule:: skpro.datatypes 19 | :no-members: 20 | :no-inherited-members: 21 | 22 | .. currentmodule:: skpro.datatypes 23 | 24 | .. autosummary:: 25 | :toctree: auto_generated/ 26 | :template: function.rst 27 | 28 | convert_to 29 | convert 30 | check_raise 31 | check_is_mtype 32 | check_is_scitype 33 | mtype 34 | scitype 35 | mtype_to_scitype 36 | scitype_to_mtype 37 | 38 | 39 | Estimator Search and Retrieval, Estimator Tags 40 | ---------------------------------------------- 41 | 42 | :mod:`skpro.registry` 43 | 44 | .. automodule:: skpro.registry 45 | :no-members: 46 | :no-inherited-members: 47 | 48 | .. currentmodule:: skpro.registry 49 | 50 | .. autosummary:: 51 | :toctree: auto_generated/ 52 | :template: function.rst 53 | 54 | all_objects 55 | all_tags 56 | check_tag_is_valid 57 | 58 | 59 | Plotting 60 | -------- 61 | 62 | :mod:`skpro.utils.plotting` 63 | 64 | .. automodule:: skpro.utils.plotting 65 | :no-members: 66 | :no-inherited-members: 67 | 68 | .. currentmodule:: skpro.utils.plotting 69 | 70 | .. autosummary:: 71 | :toctree: auto_generated/ 72 | :template: function.rst 73 | 74 | plot_crossplot_interval 75 | plot_crossplot_std 76 | plot_crossplot_loss 77 | 78 | 79 | Estimator Validity Checking 80 | --------------------------- 81 | 82 | :mod:`skpro.utils.estimator_checks` 83 | 84 | .. automodule:: skpro.utils.estimator_checks 85 | :no-members: 86 | :no-inherited-members: 87 | 88 | .. currentmodule:: skpro.utils.estimator_checks 89 | 90 | .. autosummary:: 91 | :toctree: auto_generated/ 92 | :template: function.rst 93 | 94 | check_estimator 95 | -------------------------------------------------------------------------------- /skpro/distributions/tests/test_qpd.py: -------------------------------------------------------------------------------- 1 | """Tests for quantile-parameterized distributions.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from skpro.distributions.qpd import QPD_B, QPD_S, QPD_U 7 | from skpro.tests.test_switch import run_test_for_class 8 | 9 | 10 | @pytest.mark.skipif( 11 | not run_test_for_class(QPD_B), 12 | reason="run test only if softdeps are present and incrementally (if requested)", # 13 | ) 14 | def test_qpd_b_simple_use(): 15 | """Test simple use of qpd with bounded mode.""" 16 | qpd = QPD_B( 17 | alpha=0.2, 18 | qv_low=[1, 2], 19 | qv_median=[3, 4], 20 | qv_high=[5, 6], 21 | lower=0, 22 | upper=10, 23 | ) 24 | 25 | qpd.mean() 26 | 27 | 28 | @pytest.mark.skipif( 29 | not run_test_for_class(QPD_B), 30 | reason="run test only if softdeps are present and incrementally (if requested)", # 31 | ) 32 | def test_qpd_b_pdf(): 33 | """Test pdf of qpd with bounded mode.""" 34 | # these parameters should produce a uniform on -0.5, 0.5 35 | qpd_linear = QPD_B( 36 | alpha=0.2, 37 | qv_low=-0.3, 38 | qv_median=0, 39 | qv_high=0.3, 40 | lower=-0.5, 41 | upper=0.5, 42 | ) 43 | x = np.linspace(-0.45, 0.45, 100) 44 | pdf_vals = [qpd_linear.pdf(x_) for x_ in x] 45 | np.testing.assert_allclose(pdf_vals, 1.0, rtol=1e-5) 46 | 47 | 48 | @pytest.mark.skipif( 49 | not run_test_for_class(QPD_S), 50 | reason="run test only if softdeps are present and incrementally (if requested)", 51 | ) 52 | def test_qpd_s_simple_use(): 53 | """Test simple use of qpd with semi-bounded mode.""" 54 | qpd = QPD_S( 55 | alpha=0.2, 56 | qv_low=[1, 2], 57 | qv_median=[3, 4], 58 | qv_high=[5, 6], 59 | lower=0, 60 | ) 61 | 62 | qpd.mean() 63 | 64 | 65 | @pytest.mark.skipif( 66 | not run_test_for_class(QPD_U), 67 | reason="run test only if softdeps are present and incrementally (if requested)", 68 | ) 69 | def test_qpd_u_simple_use(): 70 | """Test simple use of qpd with un-bounded mode.""" 71 | qpd = QPD_U( 72 | alpha=0.2, 73 | qv_low=[1, 2], 74 | qv_median=[3, 4], 75 | qv_high=[5, 6], 76 | ) 77 | 78 | qpd.mean() 79 | -------------------------------------------------------------------------------- /skpro/distributions/negative_binomial.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Negative binomial probability distribution.""" 3 | 4 | __author__ = ["tingiskhan"] 5 | 6 | import pandas as pd 7 | from numpy.typing import ArrayLike 8 | from scipy.stats import nbinom, rv_discrete 9 | 10 | from skpro.distributions.adapters.scipy import _ScipyAdapter 11 | 12 | 13 | class NegativeBinomial(_ScipyAdapter): 14 | """Negative binomial distribution. 15 | 16 | Most methods wrap ``scipy.stats.nbinom``. 17 | 18 | Parameters 19 | ---------- 20 | mu : ArrayLike 21 | mean of the distribution. 22 | alpha: ArrayLike 23 | dispersion of distribution. 24 | 25 | index : pd.Index, optional, default = RangeIndex 26 | columns : pd.Index, optional, default = RangeIndex 27 | 28 | Examples 29 | -------- 30 | >>> from skpro.distributions import NegativeBinomial 31 | 32 | >>> distr = NegativeBinomial(mu=1.0, alpha=1.0) 33 | """ 34 | 35 | _tags = { 36 | "capabilities:approx": ["energy"], 37 | "capabilities:exact": ["mean", "var", "pmf", "log_pmf", "cdf", "ppf"], 38 | "distr:measuretype": "discrete", 39 | "distr:paramtype": "parametric", 40 | "broadcast_init": "on", 41 | } 42 | 43 | def __init__(self, mu: ArrayLike, alpha: ArrayLike, index=None, columns=None): 44 | self.mu = mu 45 | self.alpha = alpha 46 | 47 | super().__init__(index=index, columns=columns) 48 | 49 | def _get_scipy_object(self) -> rv_discrete: 50 | return nbinom 51 | 52 | def _get_scipy_param(self) -> dict: 53 | mu = self._bc_params["mu"] 54 | alpha = self._bc_params["alpha"] 55 | 56 | n = alpha 57 | p = alpha / (alpha + mu) 58 | 59 | return [n, p], {} 60 | 61 | @classmethod 62 | def get_test_params(cls, parameter_set="default"): 63 | """Return testing parameter settings for the estimator.""" 64 | params1 = {"mu": [[1, 1], [2, 3], [4, 5]], "alpha": 2.0} 65 | params2 = { 66 | "mu": 1.0, 67 | "alpha": 2.0, 68 | "index": pd.Index([1, 2, 5]), 69 | "columns": pd.Index(["a", "b"]), 70 | } 71 | return [params1, params2] 72 | -------------------------------------------------------------------------------- /skpro/distributions/geometric.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Geometric probability distribution.""" 3 | 4 | __author__ = ["aryabhatta-dey"] 5 | 6 | import pandas as pd 7 | from scipy.stats import geom, rv_discrete 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class Geometric(_ScipyAdapter): 13 | r"""Geometric Distribution. 14 | 15 | Most methods wrap ``scipy.stats.geom``. 16 | 17 | The Geometric distribution is parameterized by the probability of 18 | success :math:`p` in a given trial 19 | such that the probability mass function (PMF) is given by: 20 | 21 | .. math:: P(X = k) = p(1 - p)^{k - 1} \quad \text{where} \quad k = 1, 2, 3, \ldots 22 | 23 | Parameters 24 | ---------- 25 | p : float or array of float (1D or 2D), must be in (0, 1] 26 | index : pd.Index, optional, default = RangeIndex 27 | columns : pd.Index, optional, default = RangeIndex 28 | 29 | Examples 30 | -------- 31 | >>> from skpro.distributions.geometric import Geometric 32 | >>> d = Geometric(p=0.5) 33 | """ 34 | 35 | _tags = { 36 | "capabilities:approx": ["pmf"], 37 | "capabilities:exact": ["mean", "var", "pmf", "log_pmf", "cdf", "ppf"], 38 | "distr:measuretype": "discrete", 39 | "distr:paramtype": "parametric", 40 | "broadcast_init": "on", 41 | } 42 | 43 | def __init__(self, p, index=None, columns=None): 44 | self.p = p 45 | 46 | super().__init__(index=index, columns=columns) 47 | 48 | def _get_scipy_object(self) -> rv_discrete: 49 | return geom 50 | 51 | def _get_scipy_param(self): 52 | p = self._bc_params["p"] 53 | 54 | return [], {"p": p} 55 | 56 | @classmethod 57 | def get_test_params(cls, parameter_set="default"): 58 | """Return testing parameter settings for the estimator.""" 59 | # array case examples 60 | params1 = {"p": [0.2, 0.5, 0.8]} 61 | params2 = { 62 | "p": 0.4, 63 | "index": pd.Index([1, 2, 5]), 64 | "columns": pd.Index(["a", "b"]), 65 | } 66 | 67 | # scalar case examples 68 | params3 = {"p": 0.7} 69 | 70 | return [params1, params2, params3] 71 | -------------------------------------------------------------------------------- /skpro/registry/_scitype.py: -------------------------------------------------------------------------------- 1 | """Utility to determine scitype of estimator, based on base class type.""" 2 | 3 | __author__ = ["fkiraly"] 4 | 5 | from inspect import isclass 6 | 7 | 8 | def scitype(obj, force_single_scitype=True, coerce_to_list=False): 9 | """Determine scitype string of obj. 10 | 11 | Parameters 12 | ---------- 13 | obj : class or object inheriting from sktime BaseObject 14 | force_single_scitype : bool, optional, default = True 15 | whether only a single scitype is returned 16 | if True, only the *first* scitype found will be returned 17 | order is determined by the order in BASE_CLASS_REGISTER 18 | coerce_to_list : bool, optional, default = False 19 | whether return should be coerced to list, even if only one scitype is identified 20 | 21 | Returns 22 | ------- 23 | scitype : str, or list of str of sktime scitype strings from BASE_CLASS_REGISTER 24 | str, sktime scitype string, if exactly one scitype can be determined for obj 25 | or force_single_scitype is True, and if coerce_to_list is False 26 | list of str, of scitype strings, if more than one scitype are determined, 27 | or if coerce_to_list is True 28 | obj has scitype if it inherits from class in same row of BASE_CLASS_REGISTER 29 | 30 | Raises 31 | ------ 32 | TypeError if no scitype can be determined for obj 33 | """ 34 | # if object has tag, return tag 35 | if hasattr(obj, "get_tag"): 36 | if isclass(obj): 37 | tag_type = obj.get_class_tag("object_type", None) 38 | else: 39 | tag_type = obj.get_tag("object_type", None, raise_error=False) 40 | if tag_type is not None: 41 | if coerce_to_list and not isinstance(tag_type, list): 42 | scitypes = [tag_type] 43 | else: 44 | scitypes = tag_type 45 | else: 46 | scitypes = ["object"] 47 | 48 | if isinstance(scitypes, list) and len(scitypes) == 0: 49 | raise TypeError("Error, no scitype could be determined for obj") 50 | 51 | if isinstance(scitypes, list) and force_single_scitype: 52 | scitypes = [scitypes[0]] 53 | 54 | if isinstance(scitypes, list) and len(scitypes) == 1 and not coerce_to_list: 55 | return scitypes[0] 56 | 57 | return scitypes 58 | -------------------------------------------------------------------------------- /docs/source/about.rst: -------------------------------------------------------------------------------- 1 | .. _top_level_about: 2 | 3 | ===== 4 | About 5 | ===== 6 | 7 | Learn more about the ``skpro`` project and its community. 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | :hidden: 12 | 13 | about/mission 14 | about/history 15 | about/team 16 | about/contributors 17 | about/governance 18 | about/roadmap 19 | 20 | .. grid:: 1 2 2 2 21 | :gutter: 3 22 | 23 | .. grid-item-card:: Mission 24 | :text-align: center 25 | 26 | ``skpro``'s mission. 27 | 28 | +++ 29 | 30 | .. button-ref:: about/mission 31 | :color: primary 32 | :click-parent: 33 | :expand: 34 | 35 | Mission 36 | 37 | .. grid-item-card:: History 38 | :text-align: center 39 | 40 | Learn about ``skpro``'s history. 41 | 42 | +++ 43 | 44 | .. button-ref:: about/history 45 | :color: primary 46 | :click-parent: 47 | :expand: 48 | 49 | History 50 | 51 | .. grid-item-card:: Development Team 52 | :text-align: center 53 | 54 | ``skpro``'s core development team. 55 | 56 | +++ 57 | 58 | .. button-ref:: about/team 59 | :color: primary 60 | :click-parent: 61 | :expand: 62 | 63 | Development Team 64 | 65 | .. grid-item-card:: Contributors 66 | :text-align: center 67 | 68 | All of ``skpro``'s contributors. 69 | 70 | +++ 71 | 72 | .. button-ref:: about/contributors 73 | :color: primary 74 | :click-parent: 75 | :expand: 76 | 77 | Contributors 78 | 79 | .. grid-item-card:: Governance 80 | :text-align: center 81 | 82 | How we govern the project. 83 | 84 | +++ 85 | 86 | .. button-ref:: about/governance 87 | :color: primary 88 | :click-parent: 89 | :expand: 90 | 91 | Governance 92 | 93 | .. grid-item-card:: Roadmap 94 | :text-align: center 95 | 96 | Where we plan to take ``skpro``. 97 | 98 | +++ 99 | 100 | .. button-ref:: about/roadmap 101 | :color: primary 102 | :click-parent: 103 | :expand: 104 | 105 | Roadmap 106 | -------------------------------------------------------------------------------- /skpro/distributions/base/tests/test_multiindex.py: -------------------------------------------------------------------------------- 1 | """Test cases for the MultiIndex functionality of the BaseDistribution. 2 | 3 | Uses the Normal distribution, but is intended to trigger the base layer. 4 | """ 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import pytest 9 | 10 | from skpro.distributions.normal import Normal 11 | 12 | 13 | @pytest.fixture 14 | def normal_dist(): 15 | ix = pd.MultiIndex.from_product([(1, 2), (2, 3)]) 16 | return Normal(np.array([[1, 2], [2, 3], [4, 5], [6, 7]]), 2, index=ix) 17 | 18 | 19 | def test_loc_partial_level(normal_dist): 20 | result = normal_dist.loc[1] 21 | expected_index = pd.MultiIndex.from_tuples([(1, 2), (1, 3)]) 22 | np.testing.assert_array_equal(result.index, expected_index) 23 | assert result.mean().shape == (2, 2) 24 | 25 | 26 | def test_loc_full_tuple(normal_dist): 27 | result = normal_dist.loc[(2, 2)] 28 | expected_index = pd.MultiIndex.from_tuples([(2, 2)]) 29 | np.testing.assert_array_equal(result.index, expected_index) 30 | assert result.mean().shape == (1, 2) 31 | 32 | 33 | def test_loc_list_of_keys(normal_dist): 34 | result = normal_dist.loc[[(1, 2), (2, 3)]] 35 | expected_index = pd.MultiIndex.from_tuples([(1, 2), (2, 3)]) 36 | np.testing.assert_array_equal(result.index, expected_index) 37 | assert result.mean().shape == (2, 2) 38 | 39 | 40 | def test_iloc_single_row(normal_dist): 41 | result = normal_dist.iloc[0] 42 | expected_index = pd.MultiIndex.from_tuples([(1, 2)]) 43 | np.testing.assert_array_equal(result.index, expected_index) 44 | assert result.mean().shape == (1, 2) 45 | 46 | 47 | def test_iloc_multiple_rows(normal_dist): 48 | result = normal_dist.iloc[[0, 3]] 49 | expected_index = pd.MultiIndex.from_tuples([(1, 2), (2, 3)]) 50 | np.testing.assert_array_equal(result.index, expected_index) 51 | assert result.mean().shape == (2, 2) 52 | 53 | 54 | def test_iloc_column_slice(normal_dist): 55 | result = normal_dist.iloc[:, 1] 56 | expected_index = normal_dist.index 57 | assert result.mean().shape == (4, 1) 58 | np.testing.assert_array_equal(result.index, expected_index) 59 | 60 | 61 | def test_loc_row_col(normal_dist): 62 | result = normal_dist.loc[(1, 2), :] 63 | expected_index = pd.MultiIndex.from_tuples([(1, 2)]) 64 | assert result.mean().shape == (1, 2) 65 | np.testing.assert_array_equal(result.index, expected_index) 66 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: check-added-large-files 6 | args: ["--maxkb=1000"] 7 | - id: check-case-conflict 8 | - id: check-merge-conflict 9 | - id: check-symlinks 10 | - id: check-yaml 11 | - id: debug-statements 12 | - id: end-of-file-fixer 13 | exclude: "^docs/source/examples/" 14 | - id: fix-encoding-pragma 15 | args: 16 | - --remove 17 | - id: requirements-txt-fixer 18 | - id: trailing-whitespace 19 | 20 | - repo: https://github.com/asottile/pyupgrade 21 | rev: v3.10.1 22 | hooks: 23 | - id: pyupgrade 24 | args: 25 | - --py38-plus 26 | 27 | - repo: https://github.com/pycqa/isort 28 | rev: 5.12.0 29 | hooks: 30 | - id: isort 31 | name: isort 32 | 33 | - repo: https://github.com/psf/black 34 | rev: 23.7.0 35 | hooks: 36 | - id: black 37 | language_version: python3 38 | # args: [--line-length 79] 39 | 40 | - repo: https://github.com/pycqa/flake8 41 | rev: 6.1.0 42 | hooks: 43 | - id: flake8 44 | exclude: docs/conf.py 45 | additional_dependencies: [flake8-bugbear, flake8-print] 46 | 47 | - repo: https://github.com/mgedmin/check-manifest 48 | rev: "0.49" 49 | hooks: 50 | - id: check-manifest 51 | stages: [manual] 52 | 53 | - repo: https://github.com/nbQA-dev/nbQA 54 | rev: 1.7.0 55 | hooks: 56 | - id: nbqa-black 57 | args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells] 58 | additional_dependencies: [black==22.3.0] 59 | - id: nbqa-isort 60 | args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells] 61 | additional_dependencies: [isort==5.6.4] 62 | - id: nbqa-flake8 63 | args: [--nbqa-dont-skip-bad-cells, "--extend-ignore=E402,E203"] 64 | additional_dependencies: [flake8==3.8.3] 65 | 66 | - repo: https://github.com/pycqa/pydocstyle 67 | rev: 6.3.0 68 | hooks: 69 | - id: pydocstyle 70 | args: ["--config=setup.cfg"] 71 | 72 | # We use the Python version instead of the original version which seems to require Docker 73 | # https://github.com/koalaman/shellcheck-precommit 74 | - repo: https://github.com/shellcheck-py/shellcheck-py 75 | rev: v0.9.0.5 76 | hooks: 77 | - id: shellcheck 78 | name: shellcheck 79 | -------------------------------------------------------------------------------- /skpro/distributions/binomial.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Binomial probability distribution.""" 3 | 4 | __author__ = ["meraldoantonio"] 5 | 6 | import pandas as pd 7 | from scipy.stats import binom, rv_discrete 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class Binomial(_ScipyAdapter): 13 | r"""Binomial distribution. 14 | 15 | Most methods wrap ``scipy.stats.binom``. 16 | The Binomial distribution is parameterized by the number of trials :math:`n` 17 | and the probability of success :math:`p`, 18 | such that the probability mass function (PMF) is given by: 19 | 20 | .. math:: P(X = k) = \binom{n}{k} p^k (1-p)^{n-k} 21 | 22 | Parameters 23 | ---------- 24 | n : int or array of int (1D or 2D), must be non-negative 25 | p : float or array of float (1D or 2D), must be in [0, 1] 26 | index : pd.Index, optional, default = RangeIndex 27 | columns : pd.Index, optional, default = RangeIndex 28 | 29 | Examples 30 | -------- 31 | >>> from skpro.distributions.binomial import Binomial 32 | 33 | >>> d = Binomial(n=[[10, 10], [20, 30], [40, 50]], p=0.5) 34 | """ 35 | 36 | _tags = { 37 | "capabilities:approx": ["pmf"], 38 | "capabilities:exact": ["mean", "var", "pmf", "log_pmf", "cdf", "ppf"], 39 | "distr:measuretype": "discrete", 40 | "distr:paramtype": "parametric", 41 | "broadcast_init": "on", 42 | } 43 | 44 | def __init__(self, n, p, index=None, columns=None): 45 | self.n = n 46 | self.p = p 47 | 48 | super().__init__(index=index, columns=columns) 49 | 50 | def _get_scipy_object(self) -> rv_discrete: 51 | return binom 52 | 53 | def _get_scipy_param(self): 54 | n = self._bc_params["n"] 55 | p = self._bc_params["p"] 56 | 57 | return [], {"n": n, "p": p} 58 | 59 | @classmethod 60 | def get_test_params(cls, parameter_set="default"): 61 | """Return testing parameter settings for the estimator.""" 62 | # array case examples 63 | params1 = {"n": [[10, 10], [20, 30], [40, 50]], "p": 0.5} 64 | params2 = { 65 | "n": 10, 66 | "p": 0.5, 67 | "index": pd.Index([1, 2, 5]), 68 | "columns": pd.Index(["a", "b"]), 69 | } 70 | # scalar case examples 71 | params3 = {"n": 15, "p": 0.7} 72 | 73 | return [params1, params2, params3] 74 | -------------------------------------------------------------------------------- /skpro/distributions/left_truncated.py: -------------------------------------------------------------------------------- 1 | """Left Truncated Discrete Distribution.""" 2 | from typing import Union 3 | 4 | from skpro.distributions.base import BaseDistribution 5 | from skpro.distributions.truncated import TruncatedDistribution 6 | 7 | 8 | class LeftTruncated(TruncatedDistribution): 9 | r"""A left truncated distribution _not_ including the lower bound. 10 | 11 | See :class:`TruncatedDistribution` for more details. 12 | 13 | Parameters 14 | ---------- 15 | distribution : BaseDistribution 16 | The distribution to truncate from the left, _not_ including the lower bound. 17 | 18 | lower : int 19 | The lower bound below which values are truncated (excluded from sampling). 20 | 21 | """ 22 | 23 | def __init__( 24 | self, 25 | distribution: BaseDistribution, 26 | lower: Union[float, int], 27 | index=None, 28 | columns=None, 29 | ): 30 | super().__init__( 31 | distribution, lower=lower, upper=None, index=index, columns=columns 32 | ) 33 | 34 | def _iloc(self, rowidx=None, colidx=None): 35 | distr = self.distribution.iloc[rowidx, colidx] 36 | 37 | if rowidx is not None: 38 | new_index = self.index[rowidx] 39 | else: 40 | new_index = self.index 41 | 42 | if colidx is not None: 43 | new_columns = self.columns[colidx] 44 | else: 45 | new_columns = self.columns 46 | 47 | cls = type(self) 48 | return cls( 49 | distribution=distr, 50 | lower=self.lower, 51 | index=new_index, 52 | columns=new_columns, 53 | ) 54 | 55 | @classmethod 56 | def get_test_params(cls, parameter_set="default"): # noqa: D102 57 | import pandas as pd 58 | 59 | from skpro.distributions import NegativeBinomial 60 | 61 | # scalar 62 | dist = NegativeBinomial(mu=1.0, alpha=1.0) 63 | params1 = { 64 | "distribution": dist, 65 | "lower": 0, 66 | } 67 | 68 | # array 69 | idx = pd.Index([1, 2]) 70 | cols = pd.Index(["a", "b"]) 71 | n_array = NegativeBinomial( 72 | mu=[[1, 2], [3, 4]], alpha=1.0, columns=cols, index=idx 73 | ) 74 | params2 = { 75 | "distribution": n_array, 76 | "lower": 0, 77 | "index": idx, 78 | "columns": cols, 79 | } 80 | 81 | return [params1, params2] 82 | -------------------------------------------------------------------------------- /skpro/metrics/survival/tests/test_c_harrell.py: -------------------------------------------------------------------------------- 1 | """Tests for Harell's C-index.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | import pandas as pd 5 | import pytest 6 | 7 | 8 | @pytest.mark.parametrize("concordant", [True, False]) 9 | @pytest.mark.parametrize("pass_c", ["True", "False", "None"]) 10 | @pytest.mark.parametrize("normalization", ["overall", "index"]) 11 | def test_charrell_logic(concordant, pass_c, normalization): 12 | """Test the logic of the Harrell's C-index metric. 13 | 14 | Parameters 15 | ---------- 16 | concordant : bool, optional, default=True 17 | If True, the test examples are fully concordant. 18 | If False, the test examples are fully discordant. 19 | pass_c : bool, optional, default=True 20 | If True, the ``C_true`` argument is passed to the metric, with censoring data. 21 | If None, the ``C_true`` argument is passed to the metric, with value None. 22 | If False, the ``C_true`` argument is not passed to the metric. 23 | normalization : str, optional, default="overall" 24 | The normalization method for the metric. 25 | """ 26 | from skpro.distributions import Normal 27 | from skpro.metrics.survival._c_harrell import ConcordanceHarrell 28 | 29 | # examples are constructed to be fully concordant or discordant, 30 | # depending on the value of `concordant` 31 | y_true = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 4, 3, 2]}) 32 | c_true = pd.DataFrame({"a": [1, 0, 1, 0], "b": [0, 1, 0, 1]}) 33 | y_pred_mean = pd.DataFrame({"a": [2, 3, 4, 5], "b": [6, 5, 4, 3]}) 34 | 35 | if not concordant: 36 | y_pred_mean = -y_pred_mean 37 | y_pred = Normal(y_pred_mean, sigma=1, columns=pd.Index(["a", "b"])) 38 | 39 | # evaluate the metric 40 | metric = ConcordanceHarrell(normalization=normalization, tie_score=int(concordant)) 41 | metric_args = {"y_true": y_true, "y_pred": y_pred} 42 | if pass_c == "True": 43 | metric_args["C_true"] = c_true 44 | elif pass_c == "None": 45 | metric_args["C_true"] = c_true 46 | 47 | res = metric(**metric_args) 48 | res_by_index = metric.evaluate_by_index(**metric_args) 49 | 50 | assert res_by_index.shape == y_true.shape 51 | 52 | # test assumptions 53 | # if concordant, the result should be 1 54 | # if discordant, the result should be 0 55 | assert res == concordant 56 | 57 | if normalization == "index": 58 | assert (res_by_index == concordant).all().all() 59 | -------------------------------------------------------------------------------- /skpro/distributions/erlang.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Erlang probability distribution.""" 3 | 4 | __author__ = ["RUPESH-KUMAR01"] 5 | 6 | import pandas as pd 7 | from scipy.stats import erlang 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class Erlang(_ScipyAdapter): 13 | r"""Erlang Distribution. 14 | 15 | Most methods wrap ``scipy.stats.erlang``. 16 | 17 | The Erlang Distribution is parameterized by shape :math:`k` 18 | and rate :math:`\lambda`, such that the pdf is 19 | 20 | .. math:: f(x) = \frac{x^{k-1}\exp\left(-\lambda x\right) \lambda^{k}}{(k-1)!} 21 | 22 | Parameters 23 | ---------- 24 | rate : float or array of float (1D or 2D) 25 | Represents the rate parameter, which is also the inverse of the scale parameter. 26 | k : int or array of int (1D or 2D), optional, default = 1 27 | Represents the shape parameter. 28 | index : pd.Index, optional, default = RangeIndex 29 | columns : pd.Index, optional, default = RangeIndex 30 | 31 | Examples 32 | -------- 33 | >>> from skpro.distributions.erlang import Erlang 34 | 35 | >>> d = Erlang(rate=[[1, 1], [2, 3], [4, 5]], k=2) 36 | """ 37 | 38 | _tags = { 39 | "capabilities:approx": ["energy", "pdfnorm"], 40 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 41 | "distr:measuretype": "continuous", 42 | "distr:paramtype": "parametric", 43 | "broadcast_init": "on", 44 | } 45 | 46 | def __init__(self, rate, k=1, index=None, columns=None): 47 | self.rate = rate 48 | self.k = k 49 | 50 | super().__init__(index=index, columns=columns) 51 | 52 | def _get_scipy_object(self): 53 | return erlang 54 | 55 | def _get_scipy_param(self): 56 | rate = self._bc_params["rate"] 57 | k = self._bc_params["k"] 58 | 59 | return [], {"scale": 1 / rate, "a": k} 60 | 61 | @classmethod 62 | def get_test_params(cls, parameter_set="default"): 63 | """Return testing parameter settings for the estimator.""" 64 | # Array case examples 65 | params1 = { 66 | "rate": 2.0, 67 | "k": 3, 68 | "index": pd.Index([0, 1, 2]), 69 | "columns": pd.Index(["x", "y"]), 70 | } 71 | # Scalar case examples 72 | params2 = {"rate": 0.8, "k": 2} 73 | 74 | params3 = {"rate": 3.0, "k": 1} 75 | 76 | return [params1, params2, params3] 77 | -------------------------------------------------------------------------------- /skpro/regression/tests/test_cyclic_boosting.py: -------------------------------------------------------------------------------- 1 | """Tests for cyclic boosting regressor.""" 2 | 3 | import pandas as pd 4 | import pytest 5 | 6 | from skpro.regression.cyclic_boosting import CyclicBoosting 7 | from skpro.tests.test_switch import run_test_for_class 8 | 9 | 10 | @pytest.mark.skipif( 11 | not run_test_for_class(CyclicBoosting), 12 | reason="run test only if softdeps are present and incrementally (if requested)", 13 | ) 14 | def test_cyclic_boosting_simple_use(): 15 | """Test simple use of cyclic boosting regressor.""" 16 | from sklearn.datasets import load_diabetes 17 | from sklearn.model_selection import train_test_split 18 | 19 | X, y = load_diabetes(return_X_y=True, as_frame=True) 20 | y = pd.DataFrame(y) 21 | X = X.iloc[:200] 22 | y = y.iloc[:200] 23 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) 24 | 25 | reg_proba = CyclicBoosting() 26 | reg_proba.fit(X_train, y_train) 27 | y_pred = reg_proba.predict_proba(X_test) 28 | 29 | assert y_pred.shape == y_test.shape 30 | 31 | 32 | @pytest.mark.skipif( 33 | not run_test_for_class(CyclicBoosting), 34 | reason="run test only if softdeps are present and incrementally (if requested)", 35 | ) 36 | def test_cyclic_boosting_with_manual_parameters(): 37 | """Test use of cyclic boosting regressor with_manual_parameters.""" 38 | from cyclic_boosting import flags 39 | from sklearn.datasets import load_diabetes 40 | from sklearn.model_selection import train_test_split 41 | 42 | X, y = load_diabetes(return_X_y=True, as_frame=True) 43 | y = pd.DataFrame(y) 44 | X = X.iloc[:200] 45 | y = y.iloc[:200] 46 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) 47 | 48 | features = [ 49 | "age", 50 | "sex", 51 | "bmi", 52 | "bp", 53 | "s1", 54 | "s2", 55 | "s3", 56 | ("age", "sex"), 57 | ] 58 | 59 | fp = { 60 | "age": flags.IS_UNORDERED, 61 | "sex": flags.IS_UNORDERED, 62 | "bmi": flags.IS_CONTINUOUS, 63 | "bp": flags.IS_CONTINUOUS, 64 | "s1": flags.IS_CONTINUOUS, 65 | "s2": flags.IS_CONTINUOUS, 66 | "s3": flags.IS_CONTINUOUS, 67 | } 68 | 69 | reg_proba = CyclicBoosting( 70 | feature_groups=features, 71 | feature_properties=fp, 72 | maximal_iterations=5, 73 | alpha=0.25, 74 | mode="additive", 75 | lower=0.0, 76 | ) 77 | reg_proba.fit(X_train, y_train) 78 | y_pred = reg_proba.predict_proba(X_test) 79 | 80 | assert y_pred.shape == y_test.shape 81 | -------------------------------------------------------------------------------- /skpro/distributions/loggamma.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Log-Gamma probability distribution.""" 3 | 4 | __author__ = ["ali-john"] 5 | 6 | import pandas as pd 7 | from scipy.stats import loggamma, rv_continuous 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class LogGamma(_ScipyAdapter): 13 | r"""Log-Gamma Distribution. 14 | 15 | Most methods wrap ``scipy.stats.loggamma``. 16 | 17 | The Log-Gamma distribution is a continuous probability distribution 18 | whose logarithm is related to the gamma distribution. It is useful 19 | in extreme value theory and reliability analysis. 20 | 21 | The Log-Gamma distribution is parameterized by the shape parameter 22 | :math:`c`, such that the pdf is 23 | 24 | .. math:: f(x) = \frac{\exp(cx - \exp(x))}{\Gamma(c)} 25 | 26 | where :math:`\Gamma(c)` is the Gamma function. 27 | 28 | The shape parameter :math:`c` is represented by the parameter ``c``. 29 | 30 | Parameters 31 | ---------- 32 | c : float or array of float (1D or 2D), must be positive 33 | shape parameter of the log-gamma distribution 34 | index : pd.Index, optional, default = RangeIndex 35 | columns : pd.Index, optional, default = RangeIndex 36 | 37 | Examples 38 | -------- 39 | >>> from skpro.distributions.loggamma import LogGamma 40 | 41 | >>> d = LogGamma(c=[[1, 2], [3, 4], [5, 6]]) 42 | """ 43 | 44 | _tags = { 45 | "capabilities:approx": ["energy", "pdfnorm"], 46 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 47 | "distr:measuretype": "continuous", 48 | "distr:paramtype": "parametric", 49 | "broadcast_init": "on", 50 | } 51 | 52 | def __init__(self, c, index=None, columns=None): 53 | self.c = c 54 | 55 | super().__init__(index=index, columns=columns) 56 | 57 | def _get_scipy_object(self) -> rv_continuous: 58 | return loggamma 59 | 60 | def _get_scipy_param(self): 61 | c = self._bc_params["c"] 62 | 63 | return [c], {} 64 | 65 | @classmethod 66 | def get_test_params(cls, parameter_set="default"): 67 | """Return testing parameter settings for the estimator.""" 68 | # array case examples 69 | params1 = {"c": [[1, 2], [3, 4]]} 70 | params2 = { 71 | "c": 2, 72 | "index": pd.Index([1, 2, 5]), 73 | "columns": pd.Index(["a", "b"]), 74 | } 75 | # scalar case examples 76 | params3 = {"c": 1.5} 77 | 78 | return [params1, params2, params3] 79 | -------------------------------------------------------------------------------- /skpro/metrics/tests/test_distr_metrics.py: -------------------------------------------------------------------------------- 1 | """Tests for probabilistic metrics for distribution predictions.""" 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | from skbase.testing import QuickTester 6 | 7 | from skpro.distributions import Normal 8 | from skpro.tests.test_all_estimators import BaseFixtureGenerator, PackageConfig 9 | 10 | TEST_DISTS = [Normal] 11 | 12 | 13 | class TestAllDistrMetrics(PackageConfig, BaseFixtureGenerator, QuickTester): 14 | """Generic tests for all probabilistic regression metrics in the package.""" 15 | 16 | # class variables which can be overridden by descendants 17 | # ------------------------------------------------------ 18 | 19 | # which object types are generated; None=all, or scitype string 20 | # passed to skpro.registry.all_objects as object_type 21 | object_type_filter = "metric_distr" 22 | 23 | @pytest.mark.parametrize("dist", TEST_DISTS) 24 | @pytest.mark.parametrize("pass_c", [True, False]) 25 | @pytest.mark.parametrize("multivariate", [True, False]) 26 | @pytest.mark.parametrize("multioutput", ["raw_values", "uniform_average"]) 27 | def test_distr_evaluate( 28 | self, object_instance, dist, pass_c, multivariate, multioutput 29 | ): 30 | """Test expected output of evaluate functions.""" 31 | metric = object_instance 32 | 33 | y_pred = dist.create_test_instance() 34 | y_true = y_pred.sample() 35 | 36 | m = metric.set_params(multioutput=multioutput) 37 | if "multivariate" in metric.get_params(): 38 | m = m.set_params(multivariate=multivariate) 39 | 40 | if not multivariate: 41 | expected_cols = y_true.columns 42 | else: 43 | expected_cols = ["score"] 44 | 45 | metric_args = {"y_true": y_true, "y_pred": y_pred} 46 | if pass_c: 47 | c_true = np.random.randint(0, 2, size=y_true.shape) 48 | c_true = pd.DataFrame(c_true, columns=y_true.columns, index=y_true.index) 49 | metric_args["c_true"] = c_true 50 | 51 | res = m.evaluate_by_index(**metric_args) 52 | assert isinstance(res, pd.DataFrame) 53 | assert (res.columns == expected_cols).all() 54 | assert res.shape == (y_true.shape[0], len(expected_cols)) 55 | 56 | res = m.evaluate(**metric_args) 57 | 58 | expect_df = not multivariate and multioutput == "raw_values" 59 | if expect_df: 60 | assert isinstance(res, pd.DataFrame) 61 | assert (res.columns == expected_cols).all() 62 | assert res.shape == (1, len(expected_cols)) 63 | else: 64 | assert isinstance(res, float) 65 | -------------------------------------------------------------------------------- /skpro/distributions/fisk.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Log-logistic aka Fisk probability distribution.""" 3 | 4 | __author__ = ["fkiraly", "malikrafsan"] 5 | 6 | import pandas as pd 7 | from scipy.stats import fisk, rv_continuous 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class Fisk(_ScipyAdapter): 13 | r"""Fisk distribution, aka log-logistic distribution. 14 | 15 | Most methods wrap ``scipy.stats.fisk``. 16 | 17 | The Fisk distribution is parametrized by a scale parameter :math:`\alpha` 18 | and a shape parameter :math:`\beta`, such that the cumulative distribution 19 | function (CDF) is given by: 20 | 21 | .. math:: F(x) = 1 - \left(1 + \frac{x}{\alpha}\right)^{-\beta}\right)^{-1} 22 | 23 | Parameters 24 | ---------- 25 | alpha : float or array of float (1D or 2D), must be positive 26 | scale parameter of the distribution 27 | beta : float or array of float (1D or 2D), must be positive 28 | shape parameter of the distribution 29 | index : pd.Index, optional, default = RangeIndex 30 | columns : pd.Index, optional, default = RangeIndex 31 | 32 | Examples 33 | -------- 34 | >>> from skpro.distributions.fisk import Fisk 35 | 36 | >>> d = Fisk(beta=[[1, 1], [2, 3], [4, 5]], alpha=2) 37 | """ 38 | 39 | _tags = { 40 | "capabilities:approx": ["energy", "pdfnorm"], 41 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 42 | "distr:measuretype": "continuous", 43 | "distr:paramtype": "parametric", 44 | "broadcast_init": "on", 45 | } 46 | 47 | def __init__(self, alpha=1, beta=1, index=None, columns=None): 48 | self.alpha = alpha 49 | self.beta = beta 50 | 51 | super().__init__(index=index, columns=columns) 52 | 53 | def _get_scipy_object(self) -> rv_continuous: 54 | return fisk 55 | 56 | def _get_scipy_param(self): 57 | alpha = self._bc_params["alpha"] 58 | beta = self._bc_params["beta"] 59 | 60 | return [], {"c": beta, "scale": alpha} 61 | 62 | @classmethod 63 | def get_test_params(cls, parameter_set="default"): 64 | """Return testing parameter settings for the estimator.""" 65 | # array case examples 66 | params1 = {"alpha": [[1, 1], [2, 3], [4, 5]], "beta": 3} 67 | params2 = { 68 | "alpha": 2, 69 | "beta": 3, 70 | "index": pd.Index([1, 2, 5]), 71 | "columns": pd.Index(["a", "b"]), 72 | } 73 | # scalar case examples 74 | params3 = {"alpha": 1.5, "beta": 2.1} 75 | 76 | return [params1, params2, params3] 77 | -------------------------------------------------------------------------------- /skpro/distributions/halfnormal.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Half-Normal probability distribution.""" 3 | 4 | __author__ = ["SaiRevanth25"] 5 | 6 | import pandas as pd 7 | from scipy.stats import halfnorm, rv_continuous 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class HalfNormal(_ScipyAdapter): 13 | r"""Half-Normal distribution. 14 | 15 | Most methods wrap ``scipy.stats.halfnorm``. 16 | 17 | This distribution is univariate, without correlation between dimensions 18 | for the array-valued case. 19 | 20 | The distribution is `cut off` at :math:`\( x = 0 \)`. There is no mass assigned to 21 | negative values; they are entirely excluded from the distribution. 22 | 23 | The half-normal distribution is parametrized by the standard deviation 24 | :math:`\sigma`, such that the pdf is 25 | 26 | .. math:: f(x) = \frac{\sqrt{2}}{\sigma \sqrt{\pi}} 27 | \exp\left(-\frac{x^2}{2\sigma^2}\right), x>0 otherwise 0 28 | 29 | The standard deviation :math:`\sigma` is represented by the parameter ``sigma``. 30 | 31 | Parameters 32 | ---------- 33 | sigma : float or array of float (1D or 2D), must be positive 34 | standard deviation of the half-normal distribution 35 | index : pd.Index, optional, default = RangeIndex 36 | columns : pd.Index, optional, default = RangeIndex 37 | 38 | Examples 39 | -------- 40 | >>> from skpro.distributions.halfnormal import HalfNormal 41 | 42 | >>> hn = HalfNormal(sigma=1) 43 | """ 44 | 45 | _tags = { 46 | "capabilities:approx": ["pdfnorm"], 47 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 48 | "distr:measuretype": "continuous", 49 | "distr:paramtype": "parametric", 50 | "broadcast_init": "on", 51 | } 52 | 53 | def __init__(self, sigma, index=None, columns=None): 54 | self.sigma = sigma 55 | 56 | super().__init__(index=index, columns=columns) 57 | 58 | def _get_scipy_object(self) -> rv_continuous: 59 | return halfnorm 60 | 61 | def _get_scipy_param(self): 62 | sigma = self._bc_params["sigma"] 63 | return [sigma], {} 64 | 65 | @classmethod 66 | def get_test_params(cls, parameter_set="default"): 67 | """Return testing parameter settings for the estimator.""" 68 | # array case examples 69 | params1 = {"sigma": [[1, 2], [3, 4]]} 70 | params2 = { 71 | "sigma": 1, 72 | "index": pd.Index([1, 2, 5]), 73 | "columns": pd.Index(["a", "b"]), 74 | } 75 | # scalar case examples 76 | params3 = {"sigma": 2} 77 | return [params1, params2, params3] 78 | -------------------------------------------------------------------------------- /skpro/distributions/inversegamma.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Inverse Gamma probability distribution.""" 3 | 4 | __author__ = ["meraldoantonio"] 5 | 6 | import pandas as pd 7 | from scipy.stats import invgamma, rv_continuous 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class InverseGamma(_ScipyAdapter): 13 | r"""Inverse Gamma Distribution. 14 | 15 | Most methods wrap ``scipy.stats.invgamma``. 16 | 17 | The Inverse Gamma Distribution is parameterized by shape :math:`\alpha` and 18 | scale :math:`\beta`, such that the pdf is 19 | 20 | .. math:: f(x) = \frac{\beta^{\alpha} x^{-\alpha-1} \exp\left(-\frac{\beta}{x}\right)}{\tau(\alpha)} 21 | 22 | where :math:`\tau(\alpha)` is the Gamma function. 23 | For all positive integers, :math:`\tau(\alpha) = (\alpha-1)!`. 24 | 25 | Parameters 26 | ---------- 27 | alpha : float or array of float (1D or 2D) 28 | The shape parameter. 29 | beta : float or array of float (1D or 2D) 30 | The scale parameter. 31 | index : pd.Index, optional, default = RangeIndex 32 | columns : pd.Index, optional, default = RangeIndex 33 | 34 | Examples 35 | -------- 36 | >>> from skpro.distributions.inversegamma import InverseGamma 37 | 38 | >>> d = InverseGamma(beta=[[1, 1], [2, 3], [4, 5]], alpha=2) 39 | """ # noqa: E501 40 | 41 | _tags = { 42 | "capabilities:approx": ["energy", "pdfnorm"], 43 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 44 | "distr:measuretype": "continuous", 45 | "distr:paramtype": "parametric", 46 | "broadcast_init": "on", 47 | } 48 | 49 | def __init__(self, alpha, beta, index=None, columns=None): 50 | self.alpha = alpha 51 | self.beta = beta 52 | 53 | super().__init__(index=index, columns=columns) 54 | 55 | def _get_scipy_object(self) -> rv_continuous: 56 | return invgamma 57 | 58 | def _get_scipy_param(self): 59 | alpha = self._bc_params["alpha"] 60 | beta = self._bc_params["beta"] 61 | scale = beta 62 | 63 | return [], {"a": alpha, "scale": scale} 64 | 65 | @classmethod 66 | def get_test_params(cls, parameter_set="default"): 67 | """Return testing parameter settings for the estimator.""" 68 | # array case examples 69 | params1 = {"alpha": [6, 2.5], "beta": [[1, 1], [2, 3], [4, 5]]} 70 | params2 = { 71 | "alpha": 2, 72 | "beta": 3, 73 | "index": pd.Index([1, 2, 5]), 74 | "columns": pd.Index(["a", "b"]), 75 | } 76 | # scalar case examples 77 | params3 = {"alpha": 1.5, "beta": 2.1} 78 | 79 | return [params1, params2, params3] 80 | -------------------------------------------------------------------------------- /skpro/distributions/alpha.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Alpha probability distribution.""" 3 | 4 | __author__ = ["SaiReavanth25"] 5 | 6 | import pandas as pd 7 | from scipy.stats import alpha, rv_continuous 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class Alpha(_ScipyAdapter): 13 | r"""Alpha distribution. 14 | 15 | Most methods wrap ``scipy.stats.alpha``. 16 | 17 | The alpha distribution is characterized by its shape parameter :math:`\a`, 18 | which determines its skewness and tail behavior. 19 | It is often used for modeling data with heavy right tails, 20 | unlike the Gaussian distribution(which is symmetric and bell-shaped). 21 | 22 | The probability density function (PDF) of the Alpha distribution is given by: 23 | .. math:: 24 | 25 | f(x) = \frac{1}{x^2 \Phi(a) \sqrt{2\pi}} 26 | \exp\left(-\frac{1}{2}\left(\frac{a - 1}{x}\right)^2\right) 27 | 28 | where: 29 | - :math:`a` is the shape parameter. 30 | - :math:`Phi` is the cumulative distribution function (CDF) of the 31 | standard normal distribution. 32 | 33 | Parameters 34 | ---------- 35 | a : float or array of float (1D or 2D), must be positive 36 | Shape parameter controlling skewness and tail behavior. 37 | Higher values result in heavier tails and greater skewness towards the right. 38 | index : pd.Index, optional, default = RangeIndex 39 | columns : pd.Index, optional, default = RangeIndex 40 | 41 | Examples 42 | -------- 43 | >>> from skpro.distributions import Alpha 44 | 45 | >>> distr = Alpha(a=[[1, 2], [3, 4]]) 46 | """ 47 | 48 | _tags = { 49 | "capabilities:approx": ["pdfnorm"], 50 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 51 | "distr:measuretype": "continuous", 52 | "distr:paramtype": "parametric", 53 | "broadcast_init": "on", 54 | } 55 | 56 | def __init__(self, a, index=None, columns=None): 57 | self.a = a 58 | 59 | super().__init__(index=index, columns=columns) 60 | 61 | def _get_scipy_object(self) -> rv_continuous: 62 | return alpha 63 | 64 | def _get_scipy_param(self): 65 | a = self._bc_params["a"] 66 | 67 | return [a], {} 68 | 69 | @classmethod 70 | def get_test_params(cls, parameter_set="default"): 71 | """Return testing parameter settings for the estimator.""" 72 | params1 = {"a": [[2, 3], [4, 5]]} 73 | params2 = { 74 | "a": 3, 75 | "index": pd.Index([1, 2, 3]), 76 | "columns": pd.Index(["a", "b"]), 77 | } 78 | params3 = {"a": 2.5} 79 | 80 | return [params1, params2, params3] 81 | -------------------------------------------------------------------------------- /skpro/distributions/loglaplace.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Log-Laplace probability distribution.""" 3 | 4 | __author__ = ["SaiRevanth25"] 5 | 6 | import pandas as pd 7 | from scipy.stats import loglaplace, rv_continuous 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class LogLaplace(_ScipyAdapter): 13 | r"""Log-Laplace distribution. 14 | 15 | Most methods wrap ``scipy.stats.loglaplace``. 16 | 17 | This distribution is univariate, without correlation between dimensions 18 | for the array-valued case. 19 | 20 | The log-Laplace distribution is a continuous probability distribution obtained by 21 | taking the logarithm of the Laplace distribution, commonly used in finance and 22 | hydrology due to its heavy tails and asymmetry. 23 | 24 | The log-Laplace distribution is parametrized by the scale parameter 25 | :math:`\c`, such that the pdf is 26 | 27 | .. math:: f(x) = \frac{c}{2} x^{c-1}, \quad 0= 1 32 | 33 | The scale parameter :math:`c` is represented by the parameter ``c``. 34 | 35 | Parameters 36 | ---------- 37 | scale : float or array of float (1D or 2D), must be positive 38 | scale parameter of the log-Laplace distribution 39 | index : pd.Index, optional, default = RangeIndex 40 | columns : pd.Index, optional, default = RangeIndex 41 | 42 | Examples 43 | -------- 44 | >>> from skpro.distributions.loglaplace import LogLaplace 45 | 46 | >>> ll = LogLaplace(scale=1) 47 | """ 48 | 49 | _tags = { 50 | "capabilities:approx": ["pdfnorm"], 51 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 52 | "distr:measuretype": "continuous", 53 | "distr:paramtype": "parametric", 54 | "broadcast_init": "on", 55 | } 56 | 57 | def __init__(self, scale, index=None, columns=None): 58 | self.scale = scale 59 | 60 | super().__init__(index=index, columns=columns) 61 | 62 | def _get_scipy_object(self) -> rv_continuous: 63 | return loglaplace 64 | 65 | def _get_scipy_param(self): 66 | scale = self._bc_params["scale"] 67 | return [scale], {} 68 | 69 | @classmethod 70 | def get_test_params(cls, parameter_set="default"): 71 | """Return testing parameter settings for the estimator.""" 72 | # array case examples 73 | params1 = {"scale": [[1, 2], [3, 4]]} 74 | params2 = { 75 | "scale": 1, 76 | "index": pd.Index([1, 2, 5]), 77 | "columns": pd.Index(["a", "b"]), 78 | } 79 | # scalar case examples 80 | params3 = {"scale": 2} 81 | return [params1, params2, params3] 82 | -------------------------------------------------------------------------------- /skpro/distributions/halfcauchy.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Half-Cauchy probability distribution.""" 3 | 4 | __author__ = ["SaiRevanth25"] 5 | 6 | import pandas as pd 7 | from scipy.stats import halfcauchy, rv_continuous 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class HalfCauchy(_ScipyAdapter): 13 | r"""Half-Cauchy distribution. 14 | 15 | Most methods wrap ``scipy.stats.halfcauchy``. 16 | 17 | This distribution is univariate, without correlation between dimensions 18 | for the array-valued case. 19 | 20 | The half-Cauchy distribution is a continuous probability distribution that 21 | is the positive half of the Cauchy distribution. It is commonly used in 22 | Bayesian statistics, especially as a prior distribution for scale parameters 23 | due to its heavy tails and non-negativity. 24 | 25 | The half-Cauchy distribution is parametrized by the scale parameter 26 | :math:`\beta`, such that the pdf is 27 | 28 | .. math:: 29 | 30 | f(x) = \frac{2}{\pi \beta \left(1 + \left(\frac{x}{\beta}\right)^2\right)}, 31 | x>0 otherwise 0 32 | 33 | The scale parameter :math:`\beta` is represented by the parameter ``beta``. 34 | 35 | Parameters 36 | ---------- 37 | beta : float or array of float (1D or 2D), must be positive 38 | scale parameter of the half-Cauchy distribution 39 | index : pd.Index, optional, default = RangeIndex 40 | columns : pd.Index, optional, default = RangeIndex 41 | 42 | Examples 43 | -------- 44 | >>> from skpro.distributions.halfcauchy import HalfCauchy 45 | 46 | >>> hc = HalfCauchy(beta=1) 47 | """ 48 | 49 | _tags = { 50 | "capabilities:approx": ["pdfnorm"], 51 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 52 | "distr:measuretype": "continuous", 53 | "distr:paramtype": "parametric", 54 | "broadcast_init": "on", 55 | } 56 | 57 | def __init__(self, beta, index=None, columns=None): 58 | self.beta = beta 59 | 60 | super().__init__(index=index, columns=columns) 61 | 62 | def _get_scipy_object(self) -> rv_continuous: 63 | return halfcauchy 64 | 65 | def _get_scipy_param(self): 66 | beta = self._bc_params["beta"] 67 | return [beta], {} 68 | 69 | @classmethod 70 | def get_test_params(cls, parameter_set="default"): 71 | """Return testing parameter settings for the estimator.""" 72 | # array case examples 73 | params1 = {"beta": [[1, 2], [3, 4]]} 74 | params2 = { 75 | "beta": 1, 76 | "index": pd.Index([1, 2, 5]), 77 | "columns": pd.Index(["a", "b"]), 78 | } 79 | # scalar case examples 80 | params3 = {"beta": 2} 81 | return [params1, params2, params3] 82 | -------------------------------------------------------------------------------- /skpro/survival/base.py: -------------------------------------------------------------------------------- 1 | """Base class for probabilistic survival regression.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | from skpro.regression.base import BaseProbaRegressor 5 | 6 | # allowed input mtypes 7 | ALLOWED_MTYPES = [ 8 | "pd_DataFrame_Table", 9 | "pd_Series_Table", 10 | "numpy1D", 11 | "numpy2D", 12 | ] 13 | 14 | 15 | class BaseSurvReg(BaseProbaRegressor): 16 | """Base class for survival regression models. 17 | 18 | Contains no additional logic, only docstring overrides. 19 | """ 20 | 21 | _tags = {"capability:survival": True} 22 | 23 | def fit(self, X, y, C=None): 24 | """Fit regressor to training data. 25 | 26 | Writes to self: 27 | Sets fitted model attributes ending in "_". 28 | 29 | Changes state to "fitted" = sets is_fitted flag to True 30 | 31 | Parameters 32 | ---------- 33 | X : pandas DataFrame 34 | feature instances to fit regressor to 35 | y : pd.DataFrame, must be same length as X 36 | labels to fit regressor to 37 | C : pd.DataFrame, optional (default=None) 38 | censoring information for survival analysis, 39 | 40 | * should have same column name as y, same length as X and y 41 | * should have entries 0 and 1 (float or int), 42 | 0 = uncensored, 1 = (right) censored 43 | 44 | if None, all observations are assumed to be uncensored. 45 | 46 | Returns 47 | ------- 48 | self : reference to self 49 | """ 50 | super().fit(X=X, y=y, C=C) 51 | return self 52 | 53 | def update(self, X, y, C=None): 54 | """Update regressor with a new batch of training data. 55 | 56 | Only estimators with the ``capability:update`` tag (value ``True``) 57 | provide this method, otherwise the method ignores the call and 58 | discards the data passed. 59 | 60 | State required: 61 | Requires state to be "fitted". 62 | 63 | Writes to self: 64 | Updates fitted model attributes ending in "_". 65 | 66 | Parameters 67 | ---------- 68 | X : pandas DataFrame 69 | feature instances to fit regressor to 70 | y : pd.DataFrame, must be same length as X 71 | labels to fit regressor to 72 | C : pd.DataFrame, optional (default=None) 73 | censoring information for survival analysis, 74 | should have same column name as y, same length as X and y 75 | should have entries 0 and 1 (float or int) 76 | 0 = uncensored, 1 = (right) censored 77 | if None, all observations are assumed to be uncensored 78 | 79 | Returns 80 | ------- 81 | self : reference to self 82 | """ 83 | super().update(X=X, y=y, C=C) 84 | return self 85 | -------------------------------------------------------------------------------- /skpro/distributions/halflogistic.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Half-Logistic probability distribution.""" 3 | 4 | __author__ = ["SaiRevanth25"] 5 | 6 | import pandas as pd 7 | from scipy.stats import halflogistic, rv_continuous 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class HalfLogistic(_ScipyAdapter): 13 | r"""Half-Logistic distribution. 14 | 15 | Most methods wrap ``scipy.stats.halflogistic``. 16 | 17 | This distribution is univariate, without correlation between dimensions 18 | for the array-valued case. 19 | 20 | The half-logistic distribution is a continuous probability distribution derived 21 | from the logistic distribution by taking only the positive half. It is particularly 22 | useful in reliability analysis, lifetime modeling, and other applications where 23 | non-negative values are required. 24 | 25 | The half-logistic distribution is parametrized by the scale parameter 26 | :math:`\beta`, such that the pdf is 27 | 28 | .. math:: 29 | 30 | f(x) = \frac{2 \exp\left(-\frac{x}{\beta}\right)} 31 | {\beta \left(1 + \exp\left(-\frac{x}{\beta}\right)\right)^2}, 32 | x>0 otherwise 0 33 | 34 | The scale parameter :math:`\beta` is represented by the parameter ``beta``. 35 | 36 | Parameters 37 | ---------- 38 | beta : float or array of float (1D or 2D), must be positive 39 | scale parameter of the half-logistic distribution 40 | index : pd.Index, optional, default = RangeIndex 41 | columns : pd.Index, optional, default = RangeIndex 42 | 43 | Examples 44 | -------- 45 | >>> from skpro.distributions.halflogistic import HalfLogistic 46 | 47 | >>> hl = HalfLogistic(beta=1) 48 | """ 49 | 50 | _tags = { 51 | "capabilities:approx": ["pdfnorm"], 52 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 53 | "distr:measuretype": "continuous", 54 | "distr:paramtype": "parametric", 55 | "broadcast_init": "on", 56 | } 57 | 58 | def __init__(self, beta, index=None, columns=None): 59 | self.beta = beta 60 | 61 | super().__init__(index=index, columns=columns) 62 | 63 | def _get_scipy_object(self) -> rv_continuous: 64 | return halflogistic 65 | 66 | def _get_scipy_param(self): 67 | beta = self._bc_params["beta"] 68 | return [beta], {} 69 | 70 | @classmethod 71 | def get_test_params(cls, parameter_set="default"): 72 | """Return testing parameter settings for the estimator.""" 73 | # array case examples 74 | params1 = {"beta": [[1, 2], [3, 4]]} 75 | params2 = { 76 | "beta": 1, 77 | "index": pd.Index([1, 2, 5]), 78 | "columns": pd.Index(["a", "b"]), 79 | } 80 | # scalar case examples 81 | params3 = {"beta": 2} 82 | return [params1, params2, params3] 83 | -------------------------------------------------------------------------------- /skpro/utils/retrieval.py: -------------------------------------------------------------------------------- 1 | """Utility functions for retrieving objects from modules.""" 2 | import importlib 3 | import inspect 4 | import pkgutil 5 | from functools import lru_cache 6 | 7 | EXCLUDE_MODULES_STARTING_WITH = ("all", "test", "contrib") 8 | 9 | 10 | def _all_functions(module_name): 11 | """Get all functions from a module, including submodules. 12 | 13 | Excludes modules starting with 'all' or 'test'. 14 | 15 | Parameters 16 | ---------- 17 | module_name : str 18 | Name of the module. 19 | 20 | Returns 21 | ------- 22 | functions_list : list 23 | List of tuples (function_name: str, function_object: function). 24 | """ 25 | # copy to avoid modifying the cache 26 | return _all_cond(module_name, inspect.isfunction).copy() 27 | 28 | 29 | def _all_classes(module_name): 30 | """Get all classes from a module, including submodules. 31 | 32 | Excludes modules starting with 'all' or 'test'. 33 | 34 | Parameters 35 | ---------- 36 | module_name : str 37 | Name of the module. 38 | 39 | Returns 40 | ------- 41 | classes_list : list 42 | List of tuples (class_name: str, class_ref: class). 43 | """ 44 | # copy to avoid modifying the cache 45 | return _all_cond(module_name, inspect.isclass).copy() 46 | 47 | 48 | @lru_cache 49 | def _all_cond(module_name, cond): 50 | """Get all objects from a module satisfying a condition. 51 | 52 | The condition should be a hashable callable, 53 | of signature ``condition(obj) -> bool``. 54 | 55 | Excludes modules starting with 'all' or 'test'. 56 | 57 | Parameters 58 | ---------- 59 | module_name : str 60 | Name of the module. 61 | cond : callable 62 | Condition to satisfy. 63 | Signature: ``condition(obj) -> bool``, 64 | passed as predicate to ``inspect.getmembers``. 65 | 66 | Returns 67 | ------- 68 | functions_list : list 69 | List of tuples (function_name, function_object). 70 | """ 71 | # Import the package 72 | package = importlib.import_module(module_name) 73 | 74 | # Initialize an empty list to hold all objects 75 | obj_list = [] 76 | 77 | # Walk through the package's modules 78 | package_path = package.__path__[0] 79 | for _, modname, _ in pkgutil.walk_packages( 80 | path=[package_path], prefix=package.__name__ + "." 81 | ): 82 | # Skip modules starting with 'all' or 'test' 83 | if modname.split(".")[-1].startswith(EXCLUDE_MODULES_STARTING_WITH): 84 | continue 85 | 86 | # Import the module 87 | module = importlib.import_module(modname) 88 | 89 | # Get all objects from the module 90 | for name, obj in inspect.getmembers(module, cond): 91 | # if object is imported from another module, skip it 92 | if obj.__module__ != module.__name__: 93 | continue 94 | # add the object to the list 95 | obj_list.append((name, obj)) 96 | 97 | return obj_list 98 | -------------------------------------------------------------------------------- /skpro/distributions/inversegaussian.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Inverse Gaussian probability distribution.""" 3 | 4 | __author__ = ["Omswastik-11"] 5 | 6 | import pandas as pd 7 | from scipy.stats import invgauss, rv_continuous 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class InverseGaussian(_ScipyAdapter): 13 | r"""Inverse Gaussian distribution, aka Wald distribution. 14 | 15 | Most methods wrap ``scipy.stats.invgauss``. 16 | 17 | The Inverse Gaussian distribution (Wald) when using SciPy's 18 | parameterization is specified by a shape parameter ``mu`` and a 19 | ``scale`` parameter. In SciPy these are the positional and keyword 20 | parameters of ``scipy.stats.invgauss(mu, scale=scale)``. The 21 | mean of the distribution is given by ``mean = mu * scale``. 22 | 23 | The pdf in terms of :math:`\mu` = ``mu`` and :math:`\sigma` = ``scale`` is: 24 | 25 | .. math:: f(x; \mu, \sigma) = \sqrt{\frac{\sigma}{2 \pi x^3}} 26 | \exp\left(-\frac{(x - \mu \sigma)^2}{2 \mu^2 \sigma x}\right) 27 | 28 | Parameters 29 | ---------- 30 | mu : float or array of float (1D or 2D), must be positive 31 | shape parameter (dimensionless) 32 | scale : float or array of float (1D or 2D), must be positive 33 | scale parameter (multiplies the distribution) 34 | index : pd.Index, optional, default = RangeIndex 35 | columns : pd.Index, optional, default = RangeIndex 36 | 37 | Examples 38 | -------- 39 | >>> from skpro.distributions.inversegaussian import InverseGaussian 40 | 41 | >>> d = InverseGaussian(mu=1.0, scale=1.0) 42 | """ 43 | 44 | _tags = { 45 | "capabilities:approx": ["energy", "pdfnorm"], 46 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 47 | "distr:measuretype": "continuous", 48 | "distr:paramtype": "parametric", 49 | "broadcast_init": "on", 50 | } 51 | 52 | def __init__(self, mu, scale, index=None, columns=None): 53 | self.mu = mu 54 | self.scale = scale 55 | 56 | super().__init__(index=index, columns=columns) 57 | 58 | def _get_scipy_object(self) -> rv_continuous: 59 | return invgauss 60 | 61 | def _get_scipy_param(self): 62 | # Pass parameters directly to scipy.stats.invgauss. 63 | # SciPy's invgauss accepts a shape parameter `mu` and a keyword `scale`. 64 | mu = self._bc_params["mu"] 65 | scale = self._bc_params["scale"] 66 | 67 | return [mu], {"scale": scale} 68 | 69 | @classmethod 70 | def get_test_params(cls, parameter_set="default"): 71 | """Return testing parameter settings for the estimator.""" 72 | # array case examples 73 | params1 = {"mu": [2, 3.5], "scale": [[1, 1], [2, 3], [4, 5]]} 74 | params2 = { 75 | "mu": 2.5, 76 | "scale": 1.5, 77 | "index": pd.Index([1, 2, 5]), 78 | "columns": pd.Index(["a", "b"]), 79 | } 80 | # scalar case examples 81 | params3 = {"mu": 3.0, "scale": 2.0} 82 | 83 | return [params1, params2, params3] 84 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at f.kiraly@ucl.ac.uk. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /skpro/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | """Probability distribution objects.""" 2 | 3 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 4 | # adapted from sktime 5 | 6 | __all__ = [ 7 | "Alpha", 8 | "Beta", 9 | "Binomial", 10 | "ChiSquared", 11 | "Delta", 12 | "Empirical", 13 | "Erlang", 14 | "Exponential", 15 | "Fisk", 16 | "Gamma", 17 | "LogGamma", 18 | "Geometric", 19 | "HalfCauchy", 20 | "HalfLogistic", 21 | "HalfNormal", 22 | "Hurdle", 23 | "IID", 24 | "InverseGamma", 25 | "InverseGaussian", 26 | "Histogram", 27 | "Laplace", 28 | "LeftTruncated", 29 | "Logistic", 30 | "LogLaplace", 31 | "LogNormal", 32 | "MeanScale", 33 | "Mixture", 34 | "NegativeBinomial", 35 | "Normal", 36 | "Pareto", 37 | "Poisson", 38 | "QPD_Empirical", 39 | "QPD_S", 40 | "QPD_B", 41 | "QPD_U", 42 | "QPD_Johnson", 43 | "SkewNormal", 44 | "TDistribution", 45 | "TransformedDistribution", 46 | "TruncatedDistribution", 47 | "TruncatedNormal", 48 | "Uniform", 49 | "Weibull", 50 | ] 51 | 52 | from skpro.distributions.alpha import Alpha 53 | from skpro.distributions.beta import Beta 54 | from skpro.distributions.binomial import Binomial 55 | from skpro.distributions.chi_squared import ChiSquared 56 | from skpro.distributions.compose import IID 57 | from skpro.distributions.delta import Delta 58 | from skpro.distributions.empirical import Empirical 59 | from skpro.distributions.erlang import Erlang 60 | from skpro.distributions.exponential import Exponential 61 | from skpro.distributions.fisk import Fisk 62 | from skpro.distributions.gamma import Gamma 63 | from skpro.distributions.geometric import Geometric 64 | from skpro.distributions.halfcauchy import HalfCauchy 65 | from skpro.distributions.halflogistic import HalfLogistic 66 | from skpro.distributions.halfnormal import HalfNormal 67 | from skpro.distributions.histogram import Histogram 68 | from skpro.distributions.hurdle import Hurdle 69 | from skpro.distributions.inversegamma import InverseGamma 70 | from skpro.distributions.inversegaussian import InverseGaussian 71 | from skpro.distributions.laplace import Laplace 72 | from skpro.distributions.left_truncated import LeftTruncated 73 | from skpro.distributions.loggamma import LogGamma 74 | from skpro.distributions.logistic import Logistic 75 | from skpro.distributions.loglaplace import LogLaplace 76 | from skpro.distributions.lognormal import LogNormal 77 | from skpro.distributions.meanscale import MeanScale 78 | from skpro.distributions.mixture import Mixture 79 | from skpro.distributions.negative_binomial import NegativeBinomial 80 | from skpro.distributions.normal import Normal 81 | from skpro.distributions.pareto import Pareto 82 | from skpro.distributions.poisson import Poisson 83 | from skpro.distributions.qpd import QPD_B, QPD_S, QPD_U, QPD_Johnson 84 | from skpro.distributions.qpd_empirical import QPD_Empirical 85 | from skpro.distributions.skew_normal import SkewNormal 86 | from skpro.distributions.t import TDistribution 87 | from skpro.distributions.trafo import TransformedDistribution 88 | from skpro.distributions.truncated import TruncatedDistribution 89 | from skpro.distributions.truncated_normal import TruncatedNormal 90 | from skpro.distributions.uniform import Uniform 91 | from skpro.distributions.weibull import Weibull 92 | -------------------------------------------------------------------------------- /skpro/datatypes/_convert_utils/_convert.py: -------------------------------------------------------------------------------- 1 | """Conversion utilities for mtypes.""" 2 | 3 | __author__ = ["fkiraly"] 4 | 5 | 6 | def _concat(fun1, fun2): 7 | """Concatenation of two converter functions, using the same store. 8 | 9 | Parameters 10 | ---------- 11 | fun1, fun2 : functions in converter signature, see datatypes._convert 12 | 13 | Returns 14 | ------- 15 | function in converter signature, see datatypes._convert 16 | concatenation fun2 o fun1, using the same store 17 | """ 18 | 19 | def concat_fun(obj, store=None): 20 | obj1 = fun1(obj, store=store) 21 | obj2 = fun2(obj1, store=store) 22 | return obj2 23 | 24 | return concat_fun 25 | 26 | 27 | def _extend_conversions(mtype, anchor_mtype, convert_dict, mtype_universe=None): 28 | """Obtain all conversions from and to mtype via conversion to anchor_mtype. 29 | 30 | Mutates convert_dict by adding all conversions from and to mtype. 31 | 32 | Assumes: 33 | convert_dict contains 34 | * conversion from `mtype` to `anchor_mtype` 35 | * conversion from `anchor_mtype` to `mtype` 36 | * conversions from `anchor_mtype` to all mtypes in `mtype_universe` 37 | * conversions from all mtypes in `mtype_universe` to `anchor_mtype` 38 | 39 | Guarantees: 40 | convert_dict contains 41 | * conversions from `mtype` to all mtypes in mtype_universe 42 | * conversions from all mtypes in mtype_universe to `mtype` 43 | 44 | conversions not in convert_dict at start are filled in as 45 | _concat(, ) 46 | conversions not in convert_dict at start are filled in as 47 | _concat(, ) 48 | 49 | Parameters 50 | ---------- 51 | mtype : mtype string in convert_dict 52 | anchor_mtype : mtype string in convert_dict 53 | convert_dict : conversion dictionary with entries of converter signature 54 | see docstring of datatypes._convert 55 | mtype_universe : iterable of mtype strings in convert_dict, coercible to list or set 56 | 57 | Returns 58 | ------- 59 | reference to convert_dict 60 | CAVEAT: convert_dict passed to this function gets mutated, this is a reference 61 | """ 62 | keys = convert_dict.keys() 63 | scitype = list(keys)[0][2] 64 | 65 | if mtype_universe is None: 66 | mtype_universe = {x[1] for x in list(keys)} 67 | mtype_universe = mtype_universe.union([x[0] for x in list(keys)]) 68 | 69 | for tp in set(mtype_universe).difference([mtype, anchor_mtype]): 70 | if (anchor_mtype, tp, scitype) in convert_dict.keys(): 71 | if (mtype, tp, scitype) not in convert_dict.keys(): 72 | convert_dict[(mtype, tp, scitype)] = _concat( 73 | convert_dict[(mtype, anchor_mtype, scitype)], 74 | convert_dict[(anchor_mtype, tp, scitype)], 75 | ) 76 | if (tp, anchor_mtype, scitype) in convert_dict.keys(): 77 | if (tp, mtype, scitype) not in convert_dict.keys(): 78 | convert_dict[(tp, mtype, scitype)] = _concat( 79 | convert_dict[(tp, anchor_mtype, scitype)], 80 | convert_dict[(anchor_mtype, mtype, scitype)], 81 | ) 82 | 83 | return convert_dict 84 | -------------------------------------------------------------------------------- /skpro/utils/_maint/_show_versions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 -u 2 | # License: BSD 3 clause 3 | """Utility methods to print system info for debugging. 4 | 5 | adapted from 6 | :func: `sklearn.show_versions` 7 | """ 8 | 9 | __author__ = ["mloning", "fkiraly"] 10 | __all__ = ["show_versions"] 11 | 12 | import importlib 13 | import platform 14 | import sys 15 | 16 | 17 | def _get_sys_info(): 18 | """System information. 19 | 20 | Return 21 | ------ 22 | sys_info : dict 23 | system and Python version information 24 | """ 25 | python = sys.version.replace("\n", " ") 26 | 27 | blob = [ 28 | ("python", python), 29 | ("executable", sys.executable), 30 | ("machine", platform.platform()), 31 | ] 32 | 33 | return dict(blob) 34 | 35 | 36 | # dependencies to print versions of, by default 37 | DEFAULT_DEPS_TO_SHOW = [ 38 | "pip", 39 | "sktime", 40 | "sklearn", 41 | "skbase", 42 | "numpy", 43 | "scipy", 44 | "pandas", 45 | "matplotlib", 46 | "joblib", 47 | "numba", 48 | "statsmodels", 49 | "pmdarima", 50 | "statsforecast", 51 | "tsfresh", 52 | "tslearn", 53 | "torch", 54 | "tensorflow", 55 | "tensorflow_probability", 56 | ] 57 | 58 | 59 | def _get_deps_info(deps=None): 60 | """Overview of the installed version of main dependencies. 61 | 62 | Parameters 63 | ---------- 64 | deps : optional, list of strings with import names 65 | if None, behaves as deps = ["sktime"] 66 | 67 | Returns 68 | ------- 69 | deps_info: dict 70 | version information on libraries in `deps` 71 | keys are import names, values are PEP 440 version strings 72 | of the import as present in the current python environment 73 | """ 74 | if deps is None: 75 | deps = ["sktime"] 76 | 77 | def get_version(module): 78 | return getattr(module, "__version__", None) 79 | 80 | deps_info = {} 81 | 82 | for modname in deps: 83 | try: 84 | if modname in sys.modules: 85 | mod = sys.modules[modname] 86 | else: 87 | mod = importlib.import_module(modname) 88 | except ImportError: 89 | deps_info[modname] = None 90 | else: 91 | ver = get_version(mod) 92 | deps_info[modname] = ver 93 | 94 | return deps_info 95 | 96 | 97 | def show_versions(): 98 | """Print python version, OS version, sktime version, selected dependency versions. 99 | 100 | Pretty prints: 101 | 102 | * python version of environment 103 | * python executable location 104 | * OS version 105 | * list of import name and version number for selected python dependencies 106 | 107 | Developer note: 108 | Python version/executable and OS version are from `_get_sys_info` 109 | Package versions are retrieved by `_get_deps_info` 110 | Selected dependencies are as in the DEFAULT_DEPS_TO_SHOW variable 111 | """ 112 | sys_info = _get_sys_info() 113 | deps_info = _get_deps_info(deps=DEFAULT_DEPS_TO_SHOW) 114 | 115 | print("\nSystem:") # noqa: T001, T201 116 | for k, stat in sys_info.items(): 117 | print(f"{k:>10}: {stat}") # noqa: T001, T201 118 | 119 | print("\nPython dependencies:") # noqa: T001, T201 120 | for k, stat in deps_info.items(): 121 | print(f"{k:>13}: {stat}") # noqa: T001, T201 122 | -------------------------------------------------------------------------------- /.github/workflows/wheels.yml: -------------------------------------------------------------------------------- 1 | name: Build wheels and publish to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | check_tag: 9 | name: Check tag 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v6 14 | 15 | - uses: actions/setup-python@v6 16 | with: 17 | python-version: '3.11' 18 | 19 | - shell: bash 20 | run: | 21 | TAG="${{ github.event.release.tag_name }}" 22 | GH_TAG_NAME="${TAG#v}" 23 | PY_VERSION=$(python - <<'PY' 24 | import pathlib, tomllib 25 | data = tomllib.loads(pathlib.Path("pyproject.toml").read_text(encoding="utf-8")) 26 | print(data.get("project").get("version")) 27 | PY 28 | ) 29 | if [ "${GH_TAG_NAME}" != "${PY_VERSION}" ]; then 30 | echo "::error::Tag (${GH_TAG_NAME}) does not match pyproject.toml version (${PY_VERSION})." 31 | exit 2 32 | fi 33 | 34 | build_wheels: 35 | name: Build wheels 36 | runs-on: ubuntu-latest 37 | 38 | steps: 39 | - uses: actions/checkout@v6 40 | 41 | - uses: actions/setup-python@v6 42 | with: 43 | python-version: '3.11' 44 | 45 | - name: Build wheel 46 | run: | 47 | python -m pip install build 48 | python -m build --wheel --sdist --outdir wheelhouse 49 | 50 | - name: Store wheels 51 | uses: actions/upload-artifact@v6 52 | with: 53 | name: wheels 54 | path: wheelhouse/* 55 | 56 | test_wheels: 57 | needs: build_wheels 58 | name: Test wheels on ${{ matrix.os }} with ${{ matrix.python-version }} 59 | runs-on: ${{ matrix.os }} 60 | strategy: 61 | fail-fast: false # to not fail all combinations if just one fail 62 | matrix: 63 | os: [windows-latest, ubuntu-latest, macOS-latest] 64 | python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] 65 | 66 | steps: 67 | - uses: actions/checkout@v6 68 | - uses: actions/setup-python@v6 69 | with: 70 | python-version: ${{ matrix.python-version }} 71 | 72 | - uses: actions/download-artifact@v7 73 | with: 74 | name: wheels 75 | path: wheelhouse 76 | 77 | # Set wheel filename differently for Unix vs Windows 78 | - name: Get wheel filename (Unix) 79 | if: runner.os != 'Windows' 80 | run: echo "WHEELNAME=$(ls ./wheelhouse/skpro-*none-any.whl)" >> $GITHUB_ENV 81 | 82 | - name: Get wheel filename (Windows) 83 | if: runner.os == 'Windows' 84 | run: echo "WHEELNAME=$(ls ./wheelhouse/skpro-*none-any.whl)" >> $env:GITHUB_ENV 85 | 86 | - name: Install wheel and extras 87 | run: python -m pip install "${{ env.WHEELNAME }}[all_extras,dev]" 88 | 89 | - name: Run tests 90 | run: | 91 | python -m pytest 92 | 93 | upload_wheels: 94 | name: Upload wheels to PyPI 95 | runs-on: ubuntu-latest 96 | needs: [build_wheels,test_wheels] 97 | 98 | permissions: 99 | id-token: write 100 | 101 | steps: 102 | - uses: actions/download-artifact@v7 103 | with: 104 | name: wheels 105 | path: wheelhouse 106 | 107 | - name: Publish package to PyPI 108 | uses: pypa/gh-action-pypi-publish@release/v1 109 | with: 110 | packages-dir: wheelhouse/ 111 | -------------------------------------------------------------------------------- /docs/source/api_reference/distributions.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _distributions_ref: 3 | 4 | Probability distributions 5 | ========================= 6 | 7 | The :mod:`sktime.distributions` module contains 8 | probability distributions which combine a ``pandas.DataFrame``-like API 9 | with a ``scikit-base`` compatible object interface. 10 | 11 | All distributions in ``skpro`` can be listed using the ``skpro.registry.all_objects`` utility, 12 | using ``object_types="distribution"``, optionally filtered by tags. 13 | Valid tags can be listed using ``sktime.registry.all_tags``. 14 | 15 | Base 16 | ---- 17 | 18 | .. currentmodule:: skpro.distributions.base 19 | 20 | .. autosummary:: 21 | :toctree: auto_generated/ 22 | :template: class.rst 23 | 24 | BaseDistribution 25 | 26 | Parametric distributions 27 | ------------------------ 28 | 29 | Continuous support - full reals 30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 31 | 32 | .. currentmodule:: skpro.distributions 33 | 34 | .. autosummary:: 35 | :toctree: auto_generated/ 36 | :template: class.rst 37 | 38 | Laplace 39 | Logistic 40 | Normal 41 | SkewNormal 42 | TDistribution 43 | TruncatedNormal 44 | 45 | 46 | Continuous support - non-negative reals 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | 49 | .. currentmodule:: skpro.distributions 50 | 51 | .. autosummary:: 52 | :toctree: auto_generated/ 53 | :template: class.rst 54 | 55 | Alpha 56 | Beta 57 | ChiSquared 58 | Exponential 59 | Erlang 60 | Fisk 61 | Gamma 62 | LogGamma 63 | HalfCauchy 64 | HalfLogistic 65 | HalfNormal 66 | InverseGamma 67 | InverseGaussian 68 | LogLaplace 69 | Pareto 70 | Weibull 71 | 72 | 73 | Integer support 74 | ~~~~~~~~~~~~~~~ 75 | 76 | .. currentmodule:: skpro.distributions 77 | 78 | .. autosummary:: 79 | :toctree: auto_generated/ 80 | :template: class.rst 81 | 82 | Binomial 83 | Geometric 84 | Hurdle 85 | NegativeBinomial 86 | Poisson 87 | 88 | Non-parametric and empirical distributions 89 | ------------------------------------------ 90 | 91 | .. currentmodule:: skpro.distributions 92 | 93 | .. autosummary:: 94 | :toctree: auto_generated/ 95 | :template: class.rst 96 | 97 | Delta 98 | Empirical 99 | Histogram 100 | QPD_Empirical 101 | QPD_Johnson 102 | QPD_U 103 | QPD_S 104 | QPD_B 105 | 106 | 107 | Composite distributions 108 | ----------------------- 109 | 110 | Parametric families 111 | ~~~~~~~~~~~~~~~~~~~ 112 | 113 | .. currentmodule:: skpro.distributions 114 | 115 | .. autosummary:: 116 | :toctree: auto_generated/ 117 | :template: class.rst 118 | 119 | MeanScale 120 | TruncatedDistribution 121 | LeftTruncated 122 | 123 | Mixture composition 124 | ~~~~~~~~~~~~~~~~~~~ 125 | 126 | .. currentmodule:: skpro.distributions 127 | 128 | .. autosummary:: 129 | :toctree: auto_generated/ 130 | :template: class.rst 131 | 132 | Mixture 133 | 134 | Transformation composition 135 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 136 | 137 | .. currentmodule:: skpro.distributions 138 | 139 | .. autosummary:: 140 | :toctree: auto_generated/ 141 | :template: class.rst 142 | 143 | TransformedDistribution 144 | 145 | Sampling and multivariate composition 146 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 147 | 148 | .. currentmodule:: skpro.distributions 149 | 150 | .. autosummary:: 151 | :toctree: auto_generated/ 152 | :template: class.rst 153 | 154 | IID 155 | -------------------------------------------------------------------------------- /skpro/distributions/exponential.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Exponential probability distribution.""" 3 | 4 | __author__ = ["ShreeshaM07"] 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from scipy.stats import expon, rv_continuous 9 | 10 | from skpro.distributions.adapters.scipy import _ScipyAdapter 11 | 12 | 13 | class Exponential(_ScipyAdapter): 14 | r"""Exponential Distribution. 15 | 16 | Most methods wrap ``scipy.stats.expon``. 17 | 18 | The Exponential distribution is parametrized by mean :math:`\mu` and 19 | scale :math:`b`, such that the pdf is 20 | 21 | .. math:: f(x) = \lambda*\exp\left(-\lambda*x\right) 22 | 23 | The rate :math:`\lambda` is represented by the parameter ``rate``, 24 | 25 | Parameter 26 | --------- 27 | rate : float or array of float (1D or 2D) 28 | rate of the distribution 29 | rate = 1/scale 30 | index : pd.Index, optional, default = RangeIndex 31 | columns : pd.Index, optional, default = RangeIndex 32 | 33 | Examples 34 | -------- 35 | >>> from skpro.distributions.exponential import Exponential 36 | >>> d = Exponential(rate=2) 37 | """ 38 | 39 | _tags = { 40 | "capabilities:approx": ["ppf", "pdfnorm"], 41 | "capabilities:exact": [ 42 | "mean", 43 | "var", 44 | "pdf", 45 | "log_pdf", 46 | "cdf", 47 | "energy", 48 | ], 49 | "distr:measuretype": "continuous", 50 | "broadcast_init": "on", 51 | } 52 | 53 | def __init__(self, rate, index=None, columns=None): 54 | self.rate = rate 55 | 56 | super().__init__(index=index, columns=columns) 57 | 58 | def _get_scipy_object(self) -> rv_continuous: 59 | return expon 60 | 61 | def _get_scipy_param(self): 62 | rate = self._bc_params["rate"] 63 | scale = 1 / rate 64 | return [], {"scale": scale} 65 | 66 | def _energy_self(self): 67 | r"""Energy of self, w.r.t. self. 68 | 69 | For Exponential(rate=λ), \mathbb{E}|X-Y| = 1/λ. 70 | """ 71 | rate = self._bc_params["rate"] 72 | energy_arr = 1 / rate 73 | if energy_arr.ndim > 0: 74 | energy_arr = energy_arr.sum(axis=1) 75 | return energy_arr 76 | 77 | def _energy_x(self, x): 78 | r"""Energy of self, w.r.t. a constant frame x. 79 | 80 | Closed form for \mathbb{E}|X - x| with X ~ Exp(rate=λ): 81 | - if x < 0: 1/λ - x 82 | - if x >= 0: x - 1/λ + 2 e^{-λ x}/λ 83 | """ 84 | rate = self._bc_params["rate"] 85 | # piecewise formula, vectorized 86 | energy_arr = (x >= 0) * (x - 1 / rate + 2 * np.exp(-rate * x) / rate) 87 | energy_arr += (x < 0) * (1 / rate - x) 88 | if energy_arr.ndim > 0: 89 | energy_arr = energy_arr.sum(axis=1) 90 | return energy_arr 91 | 92 | @classmethod 93 | def get_test_params(cls, parameter_set="default"): 94 | """Return testing parameter settings for the distribution.""" 95 | params1 = {"rate": [1, 2, 2.5, 3.5, 5]} 96 | params2 = {"rate": 2} 97 | params3 = { 98 | "rate": [ 99 | [2, 2, 2], 100 | [4, 4, 4], 101 | ], 102 | "index": pd.Index([1, 2]), 103 | "columns": pd.Index(["a", "b", "c"]), 104 | } 105 | 106 | return [params1, params2, params3] 107 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "skpro" 3 | version = "2.11.0" 4 | description = "A unified framework for tabular probabilistic regression, time-to-event prediction, and probability distributions in python" 5 | authors = [ 6 | {name = "skpro developers", email = "info@sktime.net"}, 7 | {name = "Franz Király"}, 8 | {name = "Frithjof Gressmann"}, 9 | {name = "Vitaly Davydov"}, 10 | ] 11 | maintainers = [ 12 | {name = "skpro developers", email = "info@sktime.net"}, 13 | ] 14 | readme = "README.md" 15 | keywords = [ 16 | "data-science", 17 | "machine-learning", 18 | "data-mining", 19 | "time-series", 20 | "scikit-learn", 21 | "regression", 22 | ] 23 | classifiers = [ 24 | "Intended Audience :: Science/Research", 25 | "Intended Audience :: Developers", 26 | "License :: OSI Approved :: BSD License", 27 | "Programming Language :: Python", 28 | "Topic :: Software Development", 29 | "Topic :: Scientific/Engineering", 30 | "Operating System :: Microsoft :: Windows", 31 | "Operating System :: POSIX", 32 | "Operating System :: Unix", 33 | "Operating System :: MacOS", 34 | "Programming Language :: Python :: 3.10", 35 | "Programming Language :: Python :: 3.11", 36 | "Programming Language :: Python :: 3.12", 37 | "Programming Language :: Python :: 3.13", 38 | "Programming Language :: Python :: 3.14", 39 | ] 40 | requires-python = ">=3.9,<3.15" 41 | dependencies = [ 42 | "numpy>=1.21.0,<2.4", 43 | "pandas>=1.1.0,<2.4.0", 44 | "packaging", 45 | "scikit-base>=0.6.1,<0.14.0", 46 | "scikit-learn>=0.24.0,<1.8.0", 47 | "scipy<2.0.0,>=1.2.0", 48 | ] 49 | 50 | [project.optional-dependencies] 51 | all_extras = [ 52 | "distfit; python_version < '3.13'", 53 | "lifelines<0.31.0; python_version < '3.13'", 54 | "mapie; python_version < '3.13'", 55 | "matplotlib>=3.3.2", 56 | "ngboost<0.6.0; python_version < '3.13'", 57 | "polars<1.37.0", 58 | "pymc; python_version < '3.13'", 59 | "statsmodels>=0.12.1", 60 | ] 61 | 62 | dev = [ 63 | "backoff", 64 | "httpx", 65 | "pre-commit", 66 | "pytest", 67 | "pytest-cov", 68 | "pytest-randomly", 69 | "pytest-timeout", 70 | "pytest-xdist", 71 | "wheel", 72 | ] 73 | 74 | binder = [ 75 | "jupyter", 76 | ] 77 | 78 | docs = [ 79 | "jupyter", 80 | "myst-parser", 81 | "nbsphinx>=0.8.6", 82 | "numpydoc", 83 | "pydata-sphinx-theme", 84 | "sphinx!=7.2.0,<9.0.0", 85 | "sphinx-design<0.7.0", 86 | "sphinx-issues<6.0.0", 87 | "sphinx-gallery<0.20.0", 88 | "sphinx-panels", 89 | "tabulate", 90 | ] 91 | 92 | [project.urls] 93 | Homepage = "https://github.com/sktime/skpro" 94 | Repository = "https://github.com/sktime/skpro" 95 | Documentation = "https://github.com/sktime/skpro" 96 | Download = "https://pypi.org/project/skpro/#files" 97 | "API Reference" = "https://github.com/sktime/skpro" 98 | "Release Notes" = "https://github.com/sktime/skpro" 99 | 100 | [project.license] 101 | file = "LICENSE" 102 | 103 | [build-system] 104 | requires = ["setuptools>61", "wheel", "toml", "build"] 105 | build-backend = "setuptools.build_meta" 106 | 107 | [tool.nbqa.exclude] 108 | black = "^docs/source/examples/" 109 | flake8 = "^docs/source/examples/" 110 | isort = "^docs/source/examples/" 111 | 112 | [tool.setuptools] 113 | zip-safe = true 114 | 115 | [tool.setuptools.package-data] 116 | sktime = [ 117 | "*.csv", 118 | "*.csv.gz", 119 | "*.txt", 120 | ] 121 | 122 | [tool.setuptools.packages.find] 123 | exclude = ["tests", "tests.*"] 124 | -------------------------------------------------------------------------------- /skpro/distributions/adapters/scipy/_distribution.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Adapter for Scipy Distributions.""" 3 | 4 | __author__ = ["malikrafsan"] 5 | 6 | from typing import Union 7 | 8 | import pandas as pd 9 | from scipy.stats import rv_continuous, rv_discrete 10 | 11 | from skpro.distributions.base import BaseDistribution 12 | 13 | __all__ = ["_ScipyAdapter"] 14 | 15 | 16 | class _ScipyAdapter(BaseDistribution): 17 | """Adapter for scipy distributions. 18 | 19 | This class is an adapter for scipy distributions. It provides a common 20 | interface for all scipy distributions. The class is abstract 21 | and should not be instantiated directly. 22 | """ 23 | 24 | _distribution_attr = "_dist" 25 | _tags = { 26 | "object_type": "distribution", 27 | "distr:paramtype": "parametric", 28 | } 29 | 30 | def __init__(self, index=None, columns=None): 31 | obj = self._get_scipy_object() 32 | setattr(self, self._distribution_attr, obj) 33 | super().__init__(index, columns) 34 | 35 | def _get_scipy_object(self) -> Union[rv_continuous, rv_discrete]: 36 | """Abstract method to get the scipy distribution object. 37 | 38 | Should import the scipy distribution object and return it. 39 | """ 40 | raise NotImplementedError("abstract method") 41 | 42 | def _get_scipy_param(self): 43 | """Abstract method to get the scipy distribution parameters. 44 | 45 | Should return a tuple with two elements: a list of positional arguments (args) 46 | and a dictionary of keyword arguments (kwds). 47 | """ 48 | raise NotImplementedError("abstract method") 49 | 50 | def _mean(self): 51 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr) 52 | args, kwds = self._get_scipy_param() 53 | return obj.mean(*args, **kwds) 54 | 55 | def _var(self): 56 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr) 57 | args, kwds = self._get_scipy_param() 58 | return obj.var(*args, **kwds) 59 | 60 | def _pdf(self, x: pd.DataFrame): 61 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr) 62 | args, kwds = self._get_scipy_param() 63 | return obj.pdf(x, *args, **kwds) 64 | 65 | def _log_pdf(self, x: pd.DataFrame): 66 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr) 67 | args, kwds = self._get_scipy_param() 68 | return obj.logpdf(x, *args, **kwds) 69 | 70 | def _cdf(self, x: pd.DataFrame): 71 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr) 72 | args, kwds = self._get_scipy_param() 73 | return obj.cdf(x, *args, **kwds) 74 | 75 | def _ppf(self, p: pd.DataFrame): 76 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr) 77 | args, kwds = self._get_scipy_param() 78 | return obj.ppf(p, *args, **kwds) 79 | 80 | def _pmf(self, x: pd.DataFrame): 81 | """Return the probability mass function evaluated at x.""" 82 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr) 83 | args, kwds = self._get_scipy_param() 84 | return obj.pmf(x, *args, **kwds) 85 | 86 | def _log_pmf(self, x: pd.DataFrame): 87 | """Return the log of the probability mass function evaluated at x.""" 88 | obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr) 89 | args, kwds = self._get_scipy_param() 90 | return obj.logpmf(x, *args, **kwds) 91 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. _home: 2 | 3 | ================ 4 | Welcome to skpro 5 | ================ 6 | 7 | ``skpro`` is a library for supervised probabilistic prediction and 8 | tabular probability distributions in python. 9 | 10 | Features 11 | ======== 12 | 13 | ``skpro`` provides unified, ``sklearn`` and ``skbase`` compatible interfaces to: 14 | 15 | * tabular **supervised regressors for probabilistic prediction** - interval, quantile and distribution predictions 16 | * tabular **probabilistic time-to-event and survival prediction** - instance-individual survival distributions 17 | * **metrics to evaluate probabilistic predictions**, e.g., pinball loss, empirical coverage, CRPS 18 | * **reductions** to turn ``sklearn`` regressors into probabilistic ``skpro`` regressors, such as bootstrap or conformal 19 | * building **pipelines and composite models**, including tuning via probabilistic performance metrics 20 | * symbolic **probability distributions** with value domain of ``pandas.DataFrame``-s and ``pandas``-like interface 21 | 22 | Technical specification 23 | ======================= 24 | 25 | * In-memory computation of a single machine, no distributed computing 26 | * Medium-sized data in pandas and NumPy based containers 27 | * Modular, principled and object-oriented API 28 | * Using interactive Python interpreter, no command-line interface or graphical user interface 29 | 30 | Contents 31 | ======== 32 | 33 | .. toctree:: 34 | :maxdepth: 1 35 | :hidden: 36 | 37 | get_started 38 | users 39 | installation 40 | api_reference 41 | get_involved 42 | developer_guide 43 | about 44 | examples 45 | 46 | From here, you can navigate to: 47 | 48 | .. grid:: 1 2 2 2 49 | :gutter: 3 50 | 51 | .. grid-item-card:: Get Started 52 | :text-align: center 53 | 54 | Get started using ``skpro`` quickly. 55 | 56 | +++ 57 | 58 | .. button-ref:: get_started 59 | :color: primary 60 | :click-parent: 61 | :expand: 62 | 63 | Get Started 64 | 65 | .. grid-item-card:: User Documentation 66 | :text-align: center 67 | 68 | Find user documentation. 69 | 70 | +++ 71 | 72 | .. button-ref:: users 73 | :color: primary 74 | :click-parent: 75 | :expand: 76 | 77 | Users 78 | 79 | .. grid-item-card:: API Reference 80 | :text-align: center 81 | 82 | Understand ``skpro``'s API. 83 | 84 | +++ 85 | 86 | .. button-ref:: api_reference 87 | :color: primary 88 | :click-parent: 89 | :expand: 90 | 91 | API Reference 92 | 93 | .. grid-item-card:: Get Involved 94 | :text-align: center 95 | 96 | Find out how you can contribute. 97 | 98 | +++ 99 | 100 | .. button-ref:: contribute 101 | :color: primary 102 | :click-parent: 103 | :expand: 104 | 105 | Get Involved 106 | 107 | .. grid-item-card:: Changelog 108 | :text-align: center 109 | 110 | See how the package has changed. 111 | 112 | +++ 113 | 114 | .. button-ref:: changelog 115 | :color: primary 116 | :click-parent: 117 | :expand: 118 | 119 | Changelog 120 | 121 | .. grid-item-card:: About 122 | :text-align: center 123 | 124 | Learn more about ``skpro``. 125 | 126 | +++ 127 | 128 | .. button-ref:: about 129 | :color: primary 130 | :click-parent: 131 | :expand: 132 | 133 | Learn More 134 | -------------------------------------------------------------------------------- /skpro/utils/tests/test_plots.py: -------------------------------------------------------------------------------- 1 | """Test functionality of time series plotting functions.""" 2 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 3 | 4 | import pytest 5 | from skbase.utils.dependencies import _check_soft_dependencies 6 | 7 | from skpro.tests.test_switch import run_test_module_changed 8 | 9 | 10 | @pytest.mark.skipif( 11 | not run_test_module_changed("skpro.utils") 12 | or not _check_soft_dependencies("matplotlib", severity="none"), 13 | reason="skip test if required soft dependency for matplotlib not available", 14 | ) 15 | def test_plot_crossplot_interval(): 16 | """Test that plot_crossplot_interval runs without error.""" 17 | _check_soft_dependencies("matplotlib") 18 | 19 | from sklearn.datasets import load_diabetes 20 | from sklearn.ensemble import RandomForestRegressor 21 | from sklearn.linear_model import LinearRegression 22 | 23 | from skpro.regression.residual import ResidualDouble 24 | from skpro.utils.plotting import plot_crossplot_interval 25 | 26 | X, y = load_diabetes(return_X_y=True, as_frame=True) 27 | reg_mean = LinearRegression() 28 | reg_resid = RandomForestRegressor() 29 | reg_proba = ResidualDouble(reg_mean, reg_resid) 30 | 31 | reg_proba.fit(X, y) 32 | y_pred_proba = reg_proba.predict_proba(X) 33 | 34 | plot_crossplot_interval(y, y_pred_proba, coverage=0.8) 35 | plot_crossplot_interval(y, y_pred_proba) 36 | 37 | y_pred_interval = reg_proba.predict_interval(X, coverage=0.7) 38 | plot_crossplot_interval(y, y_pred_interval) 39 | 40 | 41 | @pytest.mark.skipif( 42 | not run_test_module_changed("skpro.utils") 43 | or not _check_soft_dependencies("matplotlib", severity="none"), 44 | reason="skip test if required soft dependency for matplotlib not available", 45 | ) 46 | def test_plot_crossplot_std(): 47 | """Test that plot_crossplot_std runs without error.""" 48 | _check_soft_dependencies("matplotlib") 49 | 50 | from sklearn.datasets import load_diabetes 51 | from sklearn.ensemble import RandomForestRegressor 52 | from sklearn.linear_model import LinearRegression 53 | 54 | from skpro.regression.residual import ResidualDouble 55 | from skpro.utils.plotting import plot_crossplot_std 56 | 57 | X, y = load_diabetes(return_X_y=True, as_frame=True) 58 | reg_mean = LinearRegression() 59 | reg_resid = RandomForestRegressor() 60 | reg_proba = ResidualDouble(reg_mean, reg_resid) 61 | 62 | reg_proba.fit(X, y) 63 | y_pred = reg_proba.predict_proba(X) 64 | 65 | plot_crossplot_std(y, y_pred) 66 | 67 | y_pred_var = reg_proba.predict_var(X) 68 | plot_crossplot_std(y, y_pred_var) 69 | 70 | 71 | @pytest.mark.skipif( 72 | not run_test_module_changed("skpro.utils") 73 | or not _check_soft_dependencies("matplotlib", severity="none"), 74 | reason="skip test if required soft dependency for matplotlib not available", 75 | ) 76 | def test_plot_crossplot_loss(): 77 | """Test that plot_crossplot_loss runs without error.""" 78 | _check_soft_dependencies("matplotlib") 79 | 80 | from sklearn.datasets import load_diabetes 81 | from sklearn.ensemble import RandomForestRegressor 82 | from sklearn.linear_model import LinearRegression 83 | 84 | from skpro.metrics import CRPS 85 | from skpro.regression.residual import ResidualDouble 86 | from skpro.utils.plotting import plot_crossplot_loss 87 | 88 | X, y = load_diabetes(return_X_y=True, as_frame=True) 89 | reg_mean = LinearRegression() 90 | reg_resid = RandomForestRegressor() 91 | reg_proba = ResidualDouble(reg_mean, reg_resid) 92 | 93 | reg_proba.fit(X, y) 94 | y_pred = reg_proba.predict_proba(X) 95 | 96 | crps_metric = CRPS() 97 | plot_crossplot_loss(y, y_pred, crps_metric) 98 | -------------------------------------------------------------------------------- /skpro/distributions/truncated_normal.py: -------------------------------------------------------------------------------- 1 | # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) 2 | """Truncated Normal probability distribution.""" 3 | 4 | __author__ = ["ShreeshaM07"] 5 | 6 | import pandas as pd 7 | from scipy.stats import rv_continuous, truncnorm 8 | 9 | from skpro.distributions.adapters.scipy import _ScipyAdapter 10 | 11 | 12 | class TruncatedNormal(_ScipyAdapter): 13 | """A truncated normal probability distribution. 14 | 15 | Most methods wrap ``scipy.stats.truncnorm``. 16 | It truncates the normal distribution at 17 | the abscissa ``l_trunc`` and ``r_trunc``. 18 | 19 | Note: The truncation parameters passed 20 | is internally shifted to be centred at 21 | mean and scaled by sigma. 22 | 23 | Parameters 24 | ---------- 25 | mu : float or array of float (1D or 2D) 26 | mean of the normal distribution 27 | sigma : float or array of float (1D or 2D), must be positive 28 | standard deviation of the normal distribution 29 | l_trunc : float or array of float (1D or 2D) 30 | Left truncation abscissa. 31 | r_trunc : float or array of float (1D or 2D) 32 | Right truncation abscissa. 33 | index : pd.Index, optional, default = RangeIndex 34 | columns : pd.Index, optional, default = RangeIndex 35 | 36 | Examples 37 | -------- 38 | >>> from skpro.distributions.truncated_normal import TruncatedNormal 39 | 40 | >>> d = TruncatedNormal(\ 41 | mu=[[0, 1], [2, 3], [4, 5]],\ 42 | sigma= 1,\ 43 | l_trunc= [[-0.1,0.5],[1.5,2.4],[4.1,5]],\ 44 | r_trunc= [[0.8,2],[4,5],[5,7]]\ 45 | ) 46 | """ 47 | 48 | _tags = { 49 | "capabilities:approx": ["energy", "pdfnorm"], 50 | "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], 51 | "distr:measuretype": "continuous", 52 | "distr:paramtype": "parametric", 53 | "broadcast_init": "on", 54 | } 55 | 56 | def __init__(self, mu, sigma, l_trunc, r_trunc, index=None, columns=None): 57 | self.mu = mu 58 | self.sigma = sigma 59 | self.l_trunc = l_trunc 60 | self.r_trunc = r_trunc 61 | 62 | super().__init__(index=index, columns=columns) 63 | 64 | def _get_scipy_object(self) -> rv_continuous: 65 | return truncnorm 66 | 67 | def _get_scipy_param(self): 68 | mu = self._bc_params["mu"] 69 | sigma = self._bc_params["sigma"] 70 | l_trunc = self._bc_params["l_trunc"] 71 | r_trunc = self._bc_params["r_trunc"] 72 | 73 | # shift it to be centred at mu and sigma 74 | a = (l_trunc - mu) / sigma 75 | b = (r_trunc - mu) / sigma 76 | 77 | return [], { 78 | "loc": mu, 79 | "scale": sigma, 80 | "a": a, 81 | "b": b, 82 | } 83 | 84 | @classmethod 85 | def get_test_params(cls, parameter_set="default"): 86 | """Return testing parameter settings for the estimator.""" 87 | # array case examples 88 | params1 = { 89 | "mu": [[0, 1], [2, 3], [4, 5]], 90 | "sigma": 1, 91 | "l_trunc": [[-0.1, 0.5], [1.5, 2.4], [4.1, 5]], 92 | "r_trunc": [[0.8, 2], [4, 5], [5, 7]], 93 | } 94 | params2 = { 95 | "mu": 0, 96 | "sigma": 1, 97 | "l_trunc": [-10, -5], 98 | "r_trunc": [5, 10], 99 | "index": pd.Index([1, 2, 5]), 100 | "columns": pd.Index(["a", "b"]), 101 | } 102 | # scalar case examples 103 | params3 = {"mu": 1, "sigma": 2, "l_trunc": -3, "r_trunc": 5} 104 | return [params1, params2, params3] 105 | --------------------------------------------------------------------------------