`__ |
10 |
11 | Key Features
12 | ============
13 | - Scikit-learn compatible API
14 | - Three conformal prediction methods:
15 | - CV+ (Cross-Validation+) [1]_ [2]_
16 | - Jackknife+-after-Bootstrap [3]_
17 | - Split Conformal [4]_
18 | - Efficient conformity score calculation with parallel processing support
19 | - Regularized set predictions for classification tasks [5]_
20 |
21 | Installation
22 | ============
23 |
24 | You can install **coverforest** using pip:
25 |
26 | .. code-block:: bash
27 |
28 | pip install coverforest
29 |
30 | Requirements:
31 |
32 | - Python >=3.9
33 | - Scikit-learn >=1.6.0
34 |
35 | Quick Start
36 | ===========
37 |
38 | Classification Example
39 | ----------------------
40 | .. code-block:: python
41 |
42 | from coverforest import CoverForestClassifier
43 |
44 | clf = CoverForestClassifier(n_estimators=100, method='cv') # using CV+
45 | clf.fit(X_train, y_train)
46 | y_pred, y_sets = clf.predict(X_test, alpha=0.05) # 95% coverage sets
47 |
48 | Regression Example
49 | ------------------
50 | .. code-block:: python
51 |
52 | from coverforest import CoverForestRegressor
53 |
54 | reg = CoverForestRegressor(n_estimators=100, method='bootstrap') # using J+-a-Bootstrap
55 | reg.fit(X_train, y_train)
56 | y_pred, y_intervals = reg.predict(X_test, alpha=0.05) # 95% coverage intervals
57 |
58 |
59 | Performance Tips
60 | ================
61 |
62 | - Use the ``n_jobs`` parameter in ``fit()`` and ``predict()`` to control parallel processing (``n_jobs=-1`` uses all CPU cores)
63 | - For large test sets, consider batch processing to optimize memory usage when calculating conformity scores
64 | - The memory requirement for prediction scales with ``(n_train × n_test × n_classes)``
65 |
66 | References
67 | ==========
68 |
69 | .. [1] Romano, Y., Sesia, M., & Candès, E. J. (2020). Classification with Valid and Adaptive Coverage. NeurIPS 2020.
70 | .. [2] Barber, R. F., Candès, E. J., Ramdas, A., & Tibshirani, R. J. (2021). Predictive inference with the jackknife+. Ann. Statist. 49(1), 486-507.
71 | .. [3] Kim, B., Xu, C., & Barber, R. F. (2020). Predictive inference is free with the jackknife+-after-bootstrap. NeurIPS 2020.
72 | .. [4] Vovk, V., Nouretdinov, I., Manokhin, V., & Gammerman, A. (2018). Cross-conformal predictive distributions. COPA 2018, 37-51.
73 | .. [5] Angelopoulos, A. N., Bates, S., Jordan, M. I., & Malik, J. (2021). Uncertainty Sets for Image Classifiers using Conformal Prediction. ICLR 2021.
74 |
75 | .. toctree::
76 | :maxdepth: 3
77 | :hidden:
78 |
79 | classification/index
80 | regression/index
81 | api
82 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "setuptools_scm[toml]>=8", "Cython", "numpy>=1.26", "scikit-learn>=1.6.0"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "coverforest"
7 | version = "0.0.4"
8 | authors = [
9 | { name="Donlapark Ponnoprat", email="donlapark@gmail.com" },
10 | ]
11 | description = "A simple and fast sklearn-compatible conformal predictions with random forests for both classification and regression tasks."
12 | readme = "README.md"
13 | requires-python = ">=3.9"
14 | dependencies = [
15 | "scikit-learn>=1.6.0",
16 | ]
17 | classifiers = [
18 | "Programming Language :: Python :: 3",
19 | "Programming Language :: Python :: 3.9",
20 | "Programming Language :: Python :: 3.10",
21 | "Programming Language :: Python :: 3.11",
22 | "Programming Language :: Python :: 3.12",
23 | "License :: OSI Approved :: BSD License",
24 | "Operating System :: POSIX",
25 | "Operating System :: Unix",
26 | "Operating System :: MacOS",
27 | "Operating System :: Microsoft :: Windows",
28 | ]
29 |
30 | [project.urls]
31 | Homepage = "https://github.com/donlapark/coverforest"
32 | Issues = "https://github.com/donlapark/coverforest/issues"
33 |
34 | [tool.setuptools]
35 | packages.find.exclude = [
36 | "doc*",
37 | 'coverforest*tests',
38 | "docs*",
39 | "notebooks*",
40 | "wheels",
41 | "wheelhouse",
42 | "images",
43 | "experiments*",
44 | ]
45 |
46 | [tool.setuptools_scm]
47 | version_file = "coverforest/_version.py"
48 |
49 | [tool.pixi.project]
50 | channels = ["conda-forge"]
51 | platforms = ["win-64", "linux-64", "osx-64", "osx-arm64"]
52 |
53 | [tool.pixi.dependencies]
54 | python = ">=3.9"
55 | scikit-learn = ">=1.6.0"
56 |
57 | [tool.pixi.pypi-dependencies]
58 | coverforest = { path=".", editable=true }
59 |
60 | [tool.pixi.feature.lint.dependencies]
61 | # The version below should be aligned with the one of `.pre-commit-config.yaml`
62 | black = "23.3.0"
63 | pre-commit = "4.0.1"
64 | ruff = "0.9.1"
65 |
66 | [tool.pixi.feature.lint.tasks]
67 | black = { cmd = "black --check --diff coverforest && black --check --diff examples" }
68 | ruff = { cmd = "ruff check --output-format=full coverforest && ruff check --output-format=full examples" }
69 | lint = { depends-on = ["black", "ruff"]}
70 |
71 | [tool.pixi.feature.test.dependencies]
72 | pytest = "*"
73 | pytest-cov = "*"
74 |
75 | [tool.pixi.feature.test.tasks]
76 | test = { cmd = "pytest -vsl --cov=coverforest --cov-report=xml coverforest" }
77 |
78 | [tool.pixi.feature.doc.dependencies]
79 | matplotlib = "*"
80 | numpydoc = "*"
81 | pandas = "*"
82 | sphinx-book-theme = "*"
83 | setuptools-scm = ">=8" # needed for the versioning
84 | sphinx = "*"
85 | sphinx-design = "*"
86 | sphinx-gallery = "*"
87 | sphinx-prompt = "*"
88 | sphinxcontrib-bibtex = "*"
89 | sphinx-copybutton = "*"
90 | myst-nb = "*"
91 |
92 | [tool.pixi.feature.doc.tasks]
93 | build-doc = { cmd = "make html", cwd = "doc" }
94 | clean-doc = { cmd = "rm -rf _build", cwd = "doc" }
95 |
96 | [tool.pixi.environments]
97 | doc = ["doc"]
98 | lint = ["lint"]
99 | test = ["test"]
100 | dev = ["doc", "lint", "test"]
101 |
102 | [tool.black]
103 | line-length = 88
104 | target_version = ['py38', 'py39', 'py310']
105 | preview = true
106 | exclude = '''
107 | /(
108 | \.eggs # exclude a few common directories in the
109 | | \.git # root of the project
110 | | \.vscode
111 | )/
112 | '''
113 | force-exclude = "coverforest/_version.py"
114 |
115 | [tool.ruff]
116 | # max line length for black
117 | line-length = 88
118 | target-version = "py38"
119 | exclude=[
120 | ".git",
121 | "__pycache__",
122 | "dist",
123 | "doc/_build",
124 | "doc/auto_examples",
125 | "experiments",
126 | "build",
127 | "coverforest/_version.py",
128 | ]
129 |
130 | [tool.ruff.lint]
131 | # all rules can be found here: https://beta.ruff.rs/docs/rules/
132 | select = ["E", "F", "W", "I"]
133 | ignore=[
134 | # space before : (needed for how black formats slicing)
135 | "E203",
136 | # do not assign a lambda expression, use a def
137 | "E731",
138 | # do not use variables named 'l', 'O', or 'I'
139 | "E741",
140 | ]
141 |
142 | [tool.ruff.lint.per-file-ignores]
143 | # It's fine not to put the import at the top of the file in the examples
144 | # folder.
145 | "examples/*"=["E402"]
146 | "doc/conf.py"=["E402"]
147 | "doc/_templates/numpydoc_docstring.py"=["F821", "W292"]
148 |
149 | [tool.pytest.ini_options]
150 | addopts = ["--import-mode=importlib",]
151 | doctest_optionflags = "NORMALIZE_WHITESPACE"
152 |
--------------------------------------------------------------------------------
/docs/_static/doctools.js:
--------------------------------------------------------------------------------
1 | /*
2 | * Base JavaScript utilities for all Sphinx HTML documentation.
3 | */
4 | "use strict";
5 |
6 | const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([
7 | "TEXTAREA",
8 | "INPUT",
9 | "SELECT",
10 | "BUTTON",
11 | ]);
12 |
13 | const _ready = (callback) => {
14 | if (document.readyState !== "loading") {
15 | callback();
16 | } else {
17 | document.addEventListener("DOMContentLoaded", callback);
18 | }
19 | };
20 |
21 | /**
22 | * Small JavaScript module for the documentation.
23 | */
24 | const Documentation = {
25 | init: () => {
26 | Documentation.initDomainIndexTable();
27 | Documentation.initOnKeyListeners();
28 | },
29 |
30 | /**
31 | * i18n support
32 | */
33 | TRANSLATIONS: {},
34 | PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
35 | LOCALE: "unknown",
36 |
37 | // gettext and ngettext don't access this so that the functions
38 | // can safely bound to a different name (_ = Documentation.gettext)
39 | gettext: (string) => {
40 | const translated = Documentation.TRANSLATIONS[string];
41 | switch (typeof translated) {
42 | case "undefined":
43 | return string; // no translation
44 | case "string":
45 | return translated; // translation exists
46 | default:
47 | return translated[0]; // (singular, plural) translation tuple exists
48 | }
49 | },
50 |
51 | ngettext: (singular, plural, n) => {
52 | const translated = Documentation.TRANSLATIONS[singular];
53 | if (typeof translated !== "undefined")
54 | return translated[Documentation.PLURAL_EXPR(n)];
55 | return n === 1 ? singular : plural;
56 | },
57 |
58 | addTranslations: (catalog) => {
59 | Object.assign(Documentation.TRANSLATIONS, catalog.messages);
60 | Documentation.PLURAL_EXPR = new Function(
61 | "n",
62 | `return (${catalog.plural_expr})`
63 | );
64 | Documentation.LOCALE = catalog.locale;
65 | },
66 |
67 | /**
68 | * helper function to focus on search bar
69 | */
70 | focusSearchBar: () => {
71 | document.querySelectorAll("input[name=q]")[0]?.focus();
72 | },
73 |
74 | /**
75 | * Initialise the domain index toggle buttons
76 | */
77 | initDomainIndexTable: () => {
78 | const toggler = (el) => {
79 | const idNumber = el.id.substr(7);
80 | const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`);
81 | if (el.src.substr(-9) === "minus.png") {
82 | el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`;
83 | toggledRows.forEach((el) => (el.style.display = "none"));
84 | } else {
85 | el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`;
86 | toggledRows.forEach((el) => (el.style.display = ""));
87 | }
88 | };
89 |
90 | const togglerElements = document.querySelectorAll("img.toggler");
91 | togglerElements.forEach((el) =>
92 | el.addEventListener("click", (event) => toggler(event.currentTarget))
93 | );
94 | togglerElements.forEach((el) => (el.style.display = ""));
95 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler);
96 | },
97 |
98 | initOnKeyListeners: () => {
99 | // only install a listener if it is really needed
100 | if (
101 | !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS &&
102 | !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS
103 | )
104 | return;
105 |
106 | document.addEventListener("keydown", (event) => {
107 | // bail for input elements
108 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
109 | // bail with special keys
110 | if (event.altKey || event.ctrlKey || event.metaKey) return;
111 |
112 | if (!event.shiftKey) {
113 | switch (event.key) {
114 | case "ArrowLeft":
115 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
116 |
117 | const prevLink = document.querySelector('link[rel="prev"]');
118 | if (prevLink && prevLink.href) {
119 | window.location.href = prevLink.href;
120 | event.preventDefault();
121 | }
122 | break;
123 | case "ArrowRight":
124 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
125 |
126 | const nextLink = document.querySelector('link[rel="next"]');
127 | if (nextLink && nextLink.href) {
128 | window.location.href = nextLink.href;
129 | event.preventDefault();
130 | }
131 | break;
132 | }
133 | }
134 |
135 | // some keyboard layouts may need Shift to get /
136 | switch (event.key) {
137 | case "/":
138 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
139 | Documentation.focusSearchBar();
140 | event.preventDefault();
141 | }
142 | });
143 | },
144 | };
145 |
146 | // quick alias for translations
147 | const _ = Documentation.gettext;
148 |
149 | _ready(Documentation.init);
150 |
--------------------------------------------------------------------------------
/experiments/experiment_classification_2_mapie.py:
--------------------------------------------------------------------------------
1 | """Benchmarking run time of MapieClassifier + RandomForestClassifier"""
2 |
3 | from mapie.classification import MapieClassifier
4 |
5 | from sklearn.ensemble import RandomForestClassifier
6 | from sklearn.model_selection import train_test_split
7 | import pandas as pd
8 | import numpy as np
9 | import csv
10 | import os
11 | from scipy.stats import mode
12 | import time
13 | from ucimlrepo import fetch_ucirepo
14 | from sklearn.preprocessing import StandardScaler
15 |
16 | import tensorflow as tf
17 |
18 | result_folder = "results_classification_time"
19 |
20 | if not os.path.exists(result_folder):
21 | os.makedirs(result_folder)
22 |
23 |
24 | rng = np.random.default_rng(1)
25 | randints = rng.integers(0, high=1e6, size=30)
26 | alpha = 0.05
27 |
28 |
29 | ################################
30 | # Training & Evaluation workflow
31 | ################################
32 |
33 |
34 | def experiment(data_name, X_train, y_train, X_test, y_test, method_list, params):
35 | for method, method_name, k_, lambda_ in method_list:
36 | echo = []
37 | train_times = []
38 | test_times = []
39 | for k in range(30):
40 | rf = RandomForestClassifier(n_estimators=params[method]["n_estimators"])
41 | rfcv = MapieClassifier(
42 | estimator=rf,
43 | method="aps",
44 | cv=method,
45 | test_size=0.3,
46 | random_state=randints[k],
47 | n_jobs=-1,
48 | )
49 | curr1 = time.time()
50 | rfcv.fit(X_train, y_train)
51 | curr2 = time.time()
52 | curr3 = time.time()
53 | _, y_pred = rfcv.predict(
54 | X_test,
55 | alpha=alpha,
56 | include_last_label="randomized",
57 | agg_scores="crossval",
58 | )
59 | curr4 = time.time()
60 | train_times.append(curr2 - curr1)
61 | test_times.append(curr4 - curr3)
62 | print("time", curr2 - curr1, curr4 - curr3)
63 | echo.append(k + 1)
64 | print("echo", k, ", alpha =", alpha)
65 |
66 | filename = f"{data_name}_{method_name}_MAPIE.csv"
67 | with open(filename, "w", newline="") as f:
68 | write = csv.writer(os.path.join(result_folder, f))
69 | write.writerows([echo, echo, echo, train_times, test_times])
70 |
71 |
72 | # Set up methods and parameters.
73 | method_list = [(10, "cv", 0, 0), ("split", "split", 0, 0)]
74 |
75 | param_dict = {10: {"n_estimators": 100}, "split": {"n_estimators": int(1000 * 0.9)}}
76 |
77 |
78 | ##############
79 | # Mice dataset
80 | ##############
81 |
82 | MiceData = pd.read_csv("data/MiceClean.csv")
83 |
84 | X = MiceData.drop(columns=["class", "MouseID"])
85 | y = MiceData["class"]
86 | y = y.values.reshape(-1, 1)
87 | y = y[:, 0]
88 |
89 | seed = 1
90 | X_train, X_test, y_train, y_test = train_test_split(
91 | X, y, test_size=0.20, stratify=y, random_state=seed
92 | )
93 |
94 | experiment("MiceData", X_train, y_train, X_test, y_test, method_list, param_dict)
95 |
96 |
97 | #####################
98 | # WineQuality dataset
99 | #####################
100 | wine_quality = pd.read_csv("data/winequality-white.csv", sep=";")
101 |
102 | seed = 123
103 |
104 | sample_wine = wine_quality
105 |
106 | X = sample_wine.drop(columns=["quality"])
107 | y = sample_wine["quality"]
108 | y = y.to_numpy()
109 |
110 | X_train, X_test, y_train, y_test = train_test_split(
111 | X, y, test_size=0.20, stratify=y, random_state=seed
112 | )
113 |
114 | # Normalize data to prevent numerical overflows.
115 | scaler = StandardScaler()
116 | X_train = scaler.fit_transform(X_train)
117 | X_test = scaler.transform(X_test)
118 |
119 | experiment("WineData", X_train, y_train, X_test, y_test, method_list, param_dict)
120 |
121 |
122 | ####################
123 | # Myocardial dataset
124 | ####################
125 |
126 | myocardial = fetch_ucirepo(id=579)
127 |
128 | X = myocardial.data.features
129 | y = myocardial.data.targets
130 |
131 | y = y["LET_IS"]
132 | y = y.to_numpy()
133 |
134 | seed = 1
135 |
136 | X_train, X_test, y_train, y_test = train_test_split(
137 | X, y, test_size=0.20, stratify=y, random_state=seed
138 | )
139 |
140 | experiment("MyocData", X_train, y_train, X_test, y_test, method_list, param_dict)
141 |
142 |
143 | ###############
144 | # MNIST dataset
145 | ###############
146 |
147 | (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
148 |
149 | X_train = X_train.reshape((X_train.shape[0], -1))
150 | X_test = X_test.reshape((X_test.shape[0], -1))
151 |
152 | X_train = X_train.astype("float32")
153 | X_test = X_test.astype("float32")
154 |
155 | X_train = X_train / 255
156 | X_test = X_test / 255
157 |
158 | X_train = X_train[0:5000]
159 | y_train = y_train[0:5000]
160 |
161 | X_test = X_test[0:1250]
162 | y_test = y_test[0:1250]
163 |
164 | experiment("MNISTData", X_train, y_train, X_test, y_test, method_list, param_dict)
165 |
--------------------------------------------------------------------------------
/experiments/experiment_regression_2_ours.py:
--------------------------------------------------------------------------------
1 | """Benchmarking run time of CoverForestRegressor"""
2 |
3 | from coverforest import CoverForestRegressor
4 | from sklearn.model_selection import train_test_split
5 | import pandas as pd
6 | import numpy as np
7 | import csv
8 | import os
9 | import time
10 | from sklearn.datasets import fetch_california_housing
11 |
12 | from urllib.request import urlretrieve
13 | import zipfile
14 |
15 |
16 | result_folder = "results_regression_time"
17 |
18 | if not os.path.exists(result_folder):
19 | os.makedirs(result_folder)
20 |
21 | rng = np.random.default_rng(2)
22 | randints = rng.integers(0, high=1e6, size=30)
23 | alpha = 0.05
24 |
25 |
26 | ################################
27 | # Training & Evaluation workflow
28 | ################################
29 |
30 |
31 | def experiment(data_name, X_train, y_train, X_test, y_test, method_list, params):
32 | for method, k_, lambda_ in method_list:
33 | n_preds = []
34 | coverage_probs = []
35 | echo = []
36 | train_times = []
37 | test_times = []
38 | for k in range(30):
39 | rfreg = CoverForestRegressor(
40 | n_estimators=params[method]["n_estimators"],
41 | method=method,
42 | cv=10,
43 | n_jobs=-1,
44 | random_state=randints[k],
45 | )
46 | curr1 = time.time()
47 | rfreg.fit(X_train, y_train, alpha=alpha)
48 | curr2 = time.time()
49 | curr3 = time.time()
50 | y_pred, intervals = rfreg.predict(X_test, alpha=alpha)
51 | curr4 = time.time()
52 | train_times.append(curr2 - curr1)
53 | test_times.append(curr4 - curr3)
54 | print("Training + prediction time:", curr2 - curr1, curr4 - curr3)
55 | avg_size = np.mean(intervals[:, 1] - intervals[:, 0])
56 | cvg_prob = np.mean(
57 | (intervals[:, 0] <= y_test) & (y_test <= intervals[:, 1])
58 | )
59 | n_preds.append(avg_size)
60 | coverage_probs.append(cvg_prob)
61 | echo.append(k + 1)
62 | print("echo", k, ", alpha =", alpha)
63 | print(f"average size = {avg_size}, coverage = {cvg_prob}.")
64 |
65 | method_ = method.replace("_", "")
66 | filename = f"{data_name}_{method_}_Ours.csv"
67 | with open(filename, "w", newline="") as f:
68 | write = csv.writer(os.path.join(result_folder, f))
69 | write.writerows([echo, n_preds, coverage_probs, train_times, test_times])
70 |
71 |
72 | # Set up methods and parameters.
73 | method_list = [("bootstrap", 0, 0), ("cv", 0, 0), ("split", 0, 0)]
74 |
75 | param_dict = {
76 | "bootstrap": {"n_estimators": int(1000 * 0.9)},
77 | "cv": {"n_estimators": 100},
78 | "split": {"n_estimators": int(1000 * 0.9)},
79 | }
80 |
81 |
82 | #################
83 | # Housing dataset
84 | #################
85 |
86 | housing = fetch_california_housing()
87 | X = pd.DataFrame(housing.data, columns=housing.feature_names)
88 | y = housing.target
89 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
90 |
91 | experiment("HousingData", X_train, y_train, X_test, y_test, method_list, param_dict)
92 |
93 |
94 | ##################
95 | # Concrete dataset
96 | ##################
97 |
98 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
99 | data = pd.read_excel(url)
100 | X = data.iloc[:, :-1]
101 | y = data.iloc[:, -1]
102 | X_train, X_test, y_train, y_test = train_test_split(
103 | X, y, test_size=0.2, random_state=42
104 | )
105 |
106 | experiment("ConcreteData", X_train, y_train, X_test, y_test, method_list, param_dict)
107 |
108 |
109 | ##############
110 | # Bike dataset
111 | ##############
112 |
113 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip"
114 |
115 | urlretrieve(url, "bike_sharing.zip")
116 | with zipfile.ZipFile("bike_sharing.zip", "r") as zip_ref:
117 | zip_ref.extractall("bike_data")
118 |
119 | data = pd.read_csv("bike_data/hour.csv")
120 |
121 | X = data.drop(["instant", "dteday", "casual", "registered", "cnt"], axis=1)
122 | y = data["cnt"] # total count as target
123 |
124 | X_train, X_test, y_train, y_test = train_test_split(
125 | X, y, test_size=0.2, random_state=42
126 | )
127 |
128 | experiment("BikeData", X_train, y_train, X_test, y_test, method_list, param_dict)
129 |
130 |
131 | ###############
132 | # Crime dataset
133 | ###############
134 |
135 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data"
136 |
137 | data = pd.read_csv("communities.data", header=None, na_values=["?"])
138 |
139 | X = data.iloc[:, 5:-1]
140 | y = data.iloc[:, -1] # violent crimes per population
141 |
142 | X_train, X_test, y_train, y_test = train_test_split(
143 | X, y, test_size=0.2, random_state=42
144 | )
145 |
146 | experiment("CrimeData", X_train, y_train, X_test, y_test, method_list, param_dict)
147 |
--------------------------------------------------------------------------------
/experiments/experiment_regression_2_mapie.py:
--------------------------------------------------------------------------------
1 | """Benchmarking run time of MapieRegressor + RandomForestRegressor"""
2 |
3 | from mapie.regression import MapieRegressor
4 | from mapie.subsample import Subsample
5 | from sklearn.tree import DecisionTreeRegressor
6 | from sklearn.ensemble import RandomForestRegressor
7 | from sklearn.model_selection import train_test_split
8 | import pandas as pd
9 | import numpy as np
10 | import csv
11 | import os
12 | import time
13 | from sklearn.datasets import fetch_california_housing
14 |
15 | from urllib.request import urlretrieve
16 | import zipfile
17 |
18 | result_folder = "results_regression_time"
19 |
20 | if not os.path.exists(result_folder):
21 | os.makedirs(result_folder)
22 |
23 | rng = np.random.default_rng(2)
24 | randints = rng.integers(0, high=1e6, size=50)
25 | alpha = 0.05
26 |
27 |
28 | ################################
29 | # Training & Evaluation workflow
30 | ################################
31 |
32 |
33 | def experiment(data_name, X_train, y_train, X_test, y_test, method_list, params):
34 | for method_name, method, cv in method_list:
35 | echo = []
36 | train_times = []
37 | test_times = []
38 |
39 | for k in range(30):
40 | if name == "bootstrap":
41 | estimator = DecisionTreeRegressor(max_features="sqrt")
42 | else:
43 | estimator = RandomForestRegressor(
44 | n_estimators=params[name]["n_estimators"]
45 | )
46 | rfreg = MapieRegressor(
47 | estimator=estimator,
48 | method=method,
49 | cv=cv,
50 | test_size=0.3,
51 | random_state=randints[k],
52 | n_jobs=-1,
53 | )
54 | curr1 = time.time()
55 | rfreg.fit(X_train, y_train)
56 | curr2 = time.time()
57 | curr3 = time.time()
58 | _, intervals = rfreg.predict(X_test, alpha=alpha, ensemble=True)
59 | curr4 = time.time()
60 | intervals = np.squeeze(intervals)
61 | train_times.append(curr2 - curr1)
62 | test_times.append(curr4 - curr3)
63 | print("time", curr2 - curr1, curr4 - curr3)
64 | echo.append(k + 1)
65 | print("echo", k, ", alpha =", alpha)
66 |
67 | filename = f"{data_name}_{method_name}_MAPIE.csv"
68 | with open(filename, "w", newline="") as f:
69 | write = csv.writer(os.path.join(result_folder, f))
70 | write.writerows([echo, echo, echo, train_times, test_times])
71 |
72 |
73 | # Set up methods and parameters.
74 | method_list = [
75 | ("bootstrap", "plus", Subsample(n_resamplings=int(1000 * 0.9))),
76 | ("cv", "plus", 10),
77 | ("split", "base", "split"),
78 | ]
79 |
80 | param_dict = {"cv": {"n_estimators": 100}, "split": {"n_estimators": int(1000 * 0.9)}}
81 |
82 |
83 | #################
84 | # Housing dataset
85 | #################
86 |
87 | housing = fetch_california_housing()
88 | X = pd.DataFrame(housing.data, columns=housing.feature_names)
89 | y = housing.target
90 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
91 |
92 | experiment("HousingData", X_train, y_train, X_test, y_test, method_list, param_dict)
93 |
94 |
95 | ##################
96 | # Concrete dataset
97 | ##################
98 |
99 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
100 | data = pd.read_excel(url)
101 | X = data.iloc[:, :-1]
102 | y = data.iloc[:, -1]
103 | X_train, X_test, y_train, y_test = train_test_split(
104 | X, y, test_size=0.2, random_state=42
105 | )
106 |
107 | experiment("ConcreteData", X_train, y_train, X_test, y_test, method_list, param_dict)
108 |
109 |
110 | ##############
111 | # Bike dataset
112 | ##############
113 |
114 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip"
115 |
116 | urlretrieve(url, "bike_sharing.zip")
117 | with zipfile.ZipFile("bike_sharing.zip", "r") as zip_ref:
118 | zip_ref.extractall("bike_data")
119 |
120 | data = pd.read_csv("bike_data/hour.csv")
121 |
122 | X = data.drop(["instant", "dteday", "casual", "registered", "cnt"], axis=1)
123 | y = data["cnt"] # total count as target
124 |
125 | X_train, X_test, y_train, y_test = train_test_split(
126 | X, y, test_size=0.2, random_state=42
127 | )
128 |
129 | experiment("BikeData", X_train, y_train, X_test, y_test, method_list, param_dict)
130 |
131 |
132 | ###############
133 | # Crime dataset
134 | ###############
135 |
136 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data"
137 |
138 | data = pd.read_csv("communities.data", header=None, na_values=["?"])
139 |
140 | X = data.iloc[:, 5:-1]
141 | y = data.iloc[:, -1] # violent crimes per population
142 |
143 | X_train, X_test, y_train, y_test = train_test_split(
144 | X, y, test_size=0.2, random_state=42
145 | )
146 |
147 | experiment("CrimeData", X_train, y_train, X_test, y_test, method_list, param_dict)
148 |
--------------------------------------------------------------------------------
/experiments/experiment_regression_1.py:
--------------------------------------------------------------------------------
1 | """Benchmarking CoverForestRegressor's coverage and average interval length"""
2 |
3 | from coverforest import CoverForestRegressor
4 | from sklearn.model_selection import train_test_split
5 | import pandas as pd
6 | import numpy as np
7 | import csv
8 | import os
9 | import time
10 | from sklearn.datasets import fetch_california_housing
11 |
12 | from urllib.request import urlretrieve
13 | import zipfile
14 |
15 | result_folder = "results_regression"
16 |
17 | if not os.path.exists(result_folder):
18 | os.makedirs(result_folder)
19 |
20 | rng = np.random.default_rng(2)
21 | randints = rng.integers(0, high=1e6, size=50)
22 |
23 |
24 | ################################
25 | # Training & Evaluation workflow
26 | ################################
27 |
28 |
29 | def experiment(data_name, X_train, y_train, X_test, y_test, method_list, params):
30 | for method, k_, lambda_ in method_list:
31 | for alpha in [0.2, 0.1, 0.05]:
32 | n_preds = []
33 | coverage_probs = []
34 | echo = []
35 | train_times = []
36 | test_times = []
37 | for k in range(50):
38 | rfreg = CoverForestRegressor(
39 | n_estimators=params[method]["n_estimators"],
40 | method=method,
41 | cv=10,
42 | n_jobs=-1,
43 | random_state=randints[k],
44 | )
45 | curr1 = time.time()
46 | rfreg.fit(X_train, y_train, alpha=alpha)
47 | curr2 = time.time()
48 | curr3 = time.time()
49 | y_pred, intervals = rfreg.predict(X_test, alpha=alpha)
50 | curr4 = time.time()
51 | train_times.append(curr2 - curr1)
52 | test_times.append(curr4 - curr3)
53 | print("Training + prediction time:", curr2 - curr1, curr4 - curr3)
54 | avg_size = np.mean(intervals[:, 1] - intervals[:, 0])
55 | cvg_prob = np.mean(
56 | (intervals[:, 0] <= y_test) & (y_test <= intervals[:, 1])
57 | )
58 | n_preds.append(avg_size)
59 | coverage_probs.append(cvg_prob)
60 | echo.append(k + 1)
61 | print("echo", k, ", alpha =", alpha)
62 | print(f"average size = {avg_size}, coverage = {cvg_prob}.")
63 |
64 | method_ = method.replace("_", "")
65 | filename = f"{data_name}_{method_}_{k_}_{alpha}.csv"
66 | with open(filename, "w", newline="") as f:
67 | write = csv.writer(os.path.join(result_folder, f))
68 | write.writerows(
69 | [echo, n_preds, coverage_probs, train_times, test_times]
70 | )
71 |
72 |
73 | # Set up methods and parameters.
74 | method_list = [("bootstrap", 0, 0), ("cv", 0, 0), ("split", 0, 0)]
75 |
76 | param_dict = {
77 | "bootstrap": {"n_estimators": int(1000 * 0.9)},
78 | "cv": {"n_estimators": 100},
79 | "split": {"n_estimators": int(1000 * 0.9)},
80 | }
81 |
82 |
83 | #################
84 | # Housing dataset
85 | #################
86 |
87 | housing = fetch_california_housing()
88 | X = pd.DataFrame(housing.data, columns=housing.feature_names)[:5000]
89 | y = housing.target[:5000]
90 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
91 |
92 | experiment("HousingData", X_train, y_train, X_test, y_test, method_list, param_dict)
93 |
94 |
95 | ##################
96 | # Concrete dataset
97 | ##################
98 |
99 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
100 | data = pd.read_excel(url)
101 | X = data.iloc[:, :-1]
102 | y = data.iloc[:, -1]
103 | X_train, X_test, y_train, y_test = train_test_split(
104 | X, y, test_size=0.2, random_state=42
105 | )
106 |
107 | experiment("ConcreteData", X_train, y_train, X_test, y_test, method_list, param_dict)
108 |
109 |
110 | ##############
111 | # Bike dataset
112 | ##############
113 |
114 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip"
115 |
116 | urlretrieve(url, "bike_sharing.zip")
117 | with zipfile.ZipFile("bike_sharing.zip", "r") as zip_ref:
118 | zip_ref.extractall("bike_data")
119 |
120 | data = pd.read_csv("bike_data/hour.csv").iloc[:5000]
121 |
122 | X = data.drop(["instant", "dteday", "casual", "registered", "cnt"], axis=1)
123 | y = data["cnt"] # total count as target
124 |
125 | X_train, X_test, y_train, y_test = train_test_split(
126 | X, y, test_size=0.2, random_state=42
127 | )
128 |
129 | experiment("BikeData", X_train, y_train, X_test, y_test, method_list, param_dict)
130 |
131 |
132 | ###############
133 | # Crime dataset
134 | ###############
135 |
136 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data"
137 |
138 | data = pd.read_csv("communities.data", header=None, na_values=["?"])
139 |
140 | X = data.iloc[:, 5:-1]
141 | y = data.iloc[:, -1] # violent crimes per population
142 |
143 | X_train, X_test, y_train, y_test = train_test_split(
144 | X, y, test_size=0.2, random_state=42
145 | )
146 |
147 | experiment("CrimeData", X_train, y_train, X_test, y_test, method_list, param_dict)
148 |
--------------------------------------------------------------------------------
/experiments/experiment_classification_2_ours.py:
--------------------------------------------------------------------------------
1 | """Benchmarking run time of CoverForestClassifier"""
2 |
3 | from coverforest import CoverForestClassifier
4 |
5 | from sklearn.model_selection import train_test_split
6 | import pandas as pd
7 | import numpy as np
8 | import csv
9 | import os
10 | from scipy.stats import mode
11 | import time
12 | from ucimlrepo import fetch_ucirepo
13 | from sklearn.preprocessing import StandardScaler
14 |
15 | import tensorflow as tf
16 |
17 | result_folder = "results_classification_time"
18 |
19 | if not os.path.exists(result_folder):
20 | os.makedirs(result_folder)
21 |
22 | rng = np.random.default_rng(2)
23 | randints = rng.integers(0, high=1e6, size=30)
24 | alpha = 0.5
25 |
26 |
27 | ################################
28 | # Training & Evaluation workflow
29 | ################################
30 |
31 |
32 | def experiment(data_name, X_train, y_train, X_test, y_test, method_list, params):
33 | for method, k_, lambda_ in method_list:
34 | n_preds = []
35 | coverage_probs = []
36 | echo = []
37 | train_times = []
38 | test_times = []
39 | for k in range(30):
40 | rfclf = CoverForestClassifier(
41 | n_estimators=params[method]["n_estimators"],
42 | method=method,
43 | allow_empty_sets=True,
44 | cv=10,
45 | k_init=k_,
46 | lambda_init=lambda_,
47 | n_jobs=-1,
48 | random_state=randints[k],
49 | )
50 | curr1 = time.time()
51 | rfclf.fit(X_train, y_train, alpha=alpha)
52 | curr2 = time.time()
53 | curr3 = time.time()
54 | _, y_pred = rfclf.predict(X_test, alpha=alpha, binary_output=False)
55 | curr4 = time.time()
56 | train_times.append(curr2 - curr1)
57 | test_times.append(curr4 - curr3)
58 | print("Training time:", curr2 - curr1, curr4 - curr3)
59 | avg_size = np.mean([y_pred[i].shape[0] for i in range(len(y_pred))])
60 | cvg_prob = np.mean([y_test[i] in y_pred[i] for i in range(len(y_pred))])
61 | n_preds.append(avg_size)
62 | coverage_probs.append(cvg_prob)
63 | echo.append(k + 1)
64 | print("echo", k, ", alpha =", alpha)
65 | print(f"average size = {avg_size}, coverage = {cvg_prob}.")
66 |
67 | method_ = method.replace("_", "")
68 | filename = f"{data_name}_{method_}_Ours.csv"
69 | with open(filename, "w", newline="") as f:
70 | write = csv.writer(os.path.join(result_folder, f))
71 | write.writerows([echo, n_preds, coverage_probs, train_times, test_times])
72 |
73 |
74 | # Set up methods and parameters.
75 | method_list = [("bootstrap", 0, 0), ("cv", 0, 0), ("split", 0, 0)]
76 |
77 | param_dict = {
78 | "bootstrap": {"n_estimators": int(1000 * 0.9)},
79 | "cv": {"n_estimators": 100},
80 | "split": {"n_estimators": int(1000 * 0.9)},
81 | }
82 |
83 |
84 | ##############
85 | # Mice dataset
86 | ##############
87 |
88 | MiceData = pd.read_csv("data/MiceClean.csv")
89 |
90 | X = MiceData.drop(columns=["class", "MouseID"])
91 | y = MiceData["class"]
92 | y = y.values.reshape(-1, 1)
93 | y = y[:, 0]
94 |
95 | seed = 1
96 | X_train, X_test, y_train, y_test = train_test_split(
97 | X, y, test_size=0.20, stratify=y, random_state=seed
98 | )
99 |
100 | experiment("MiceData", X_train, y_train, X_test, y_test, method_list, param_dict)
101 |
102 |
103 | #####################
104 | # WineQuality dataset
105 | #####################
106 | wine_quality = pd.read_csv("data/winequality-white.csv", sep=";")
107 |
108 | seed = 123
109 |
110 | sample_wine = wine_quality
111 |
112 | X = sample_wine.drop(columns=["quality"])
113 | y = sample_wine["quality"]
114 | y = y.to_numpy()
115 |
116 | X_train, X_test, y_train, y_test = train_test_split(
117 | X, y, test_size=0.20, stratify=y, random_state=seed
118 | )
119 |
120 | # Normalize data to prevent numerical overflows.
121 | scaler = StandardScaler()
122 | X_train = scaler.fit_transform(X_train)
123 | X_test = scaler.transform(X_test)
124 |
125 | experiment("WineData", X_train, y_train, X_test, y_test, method_list, param_dict)
126 |
127 |
128 | ####################
129 | # Myocardial dataset
130 | ####################
131 |
132 | myocardial = fetch_ucirepo(id=579)
133 |
134 | X = myocardial.data.features
135 | y = myocardial.data.targets
136 |
137 | y = y["LET_IS"]
138 | y = y.to_numpy()
139 |
140 | seed = 1
141 |
142 | X_train, X_test, y_train, y_test = train_test_split(
143 | X, y, test_size=0.20, stratify=y, random_state=seed
144 | )
145 |
146 | experiment("MyocData", X_train, y_train, X_test, y_test, method_list, param_dict)
147 |
148 |
149 | ###############
150 | # MNIST dataset
151 | ###############
152 |
153 | (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
154 |
155 | X_train = X_train.reshape((X_train.shape[0], -1))
156 | X_test = X_test.reshape((X_test.shape[0], -1))
157 |
158 | X_train = X_train.astype("float32")
159 | X_test = X_test.astype("float32")
160 |
161 | X_train = X_train / 255
162 | X_test = X_test / 255
163 |
164 | X_train = X_train[0:5000]
165 | y_train = y_train[0:5000]
166 |
167 | X_test = X_test[0:1250]
168 | y_test = y_test[0:1250]
169 |
170 | experiment("MNISTData", X_train, y_train, X_test, y_test, method_list, param_dict)
171 |
--------------------------------------------------------------------------------
/docs/_static/language_data.js:
--------------------------------------------------------------------------------
1 | /*
2 | * This script contains the language-specific data used by searchtools.js,
3 | * namely the list of stopwords, stemmer, scorer and splitter.
4 | */
5 |
6 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
7 |
8 |
9 | /* Non-minified version is copied as a separate JS file, if available */
10 |
11 | /**
12 | * Porter Stemmer
13 | */
14 | var Stemmer = function() {
15 |
16 | var step2list = {
17 | ational: 'ate',
18 | tional: 'tion',
19 | enci: 'ence',
20 | anci: 'ance',
21 | izer: 'ize',
22 | bli: 'ble',
23 | alli: 'al',
24 | entli: 'ent',
25 | eli: 'e',
26 | ousli: 'ous',
27 | ization: 'ize',
28 | ation: 'ate',
29 | ator: 'ate',
30 | alism: 'al',
31 | iveness: 'ive',
32 | fulness: 'ful',
33 | ousness: 'ous',
34 | aliti: 'al',
35 | iviti: 'ive',
36 | biliti: 'ble',
37 | logi: 'log'
38 | };
39 |
40 | var step3list = {
41 | icate: 'ic',
42 | ative: '',
43 | alize: 'al',
44 | iciti: 'ic',
45 | ical: 'ic',
46 | ful: '',
47 | ness: ''
48 | };
49 |
50 | var c = "[^aeiou]"; // consonant
51 | var v = "[aeiouy]"; // vowel
52 | var C = c + "[^aeiouy]*"; // consonant sequence
53 | var V = v + "[aeiou]*"; // vowel sequence
54 |
55 | var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
56 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
57 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
58 | var s_v = "^(" + C + ")?" + v; // vowel in stem
59 |
60 | this.stemWord = function (w) {
61 | var stem;
62 | var suffix;
63 | var firstch;
64 | var origword = w;
65 |
66 | if (w.length < 3)
67 | return w;
68 |
69 | var re;
70 | var re2;
71 | var re3;
72 | var re4;
73 |
74 | firstch = w.substr(0,1);
75 | if (firstch == "y")
76 | w = firstch.toUpperCase() + w.substr(1);
77 |
78 | // Step 1a
79 | re = /^(.+?)(ss|i)es$/;
80 | re2 = /^(.+?)([^s])s$/;
81 |
82 | if (re.test(w))
83 | w = w.replace(re,"$1$2");
84 | else if (re2.test(w))
85 | w = w.replace(re2,"$1$2");
86 |
87 | // Step 1b
88 | re = /^(.+?)eed$/;
89 | re2 = /^(.+?)(ed|ing)$/;
90 | if (re.test(w)) {
91 | var fp = re.exec(w);
92 | re = new RegExp(mgr0);
93 | if (re.test(fp[1])) {
94 | re = /.$/;
95 | w = w.replace(re,"");
96 | }
97 | }
98 | else if (re2.test(w)) {
99 | var fp = re2.exec(w);
100 | stem = fp[1];
101 | re2 = new RegExp(s_v);
102 | if (re2.test(stem)) {
103 | w = stem;
104 | re2 = /(at|bl|iz)$/;
105 | re3 = new RegExp("([^aeiouylsz])\\1$");
106 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
107 | if (re2.test(w))
108 | w = w + "e";
109 | else if (re3.test(w)) {
110 | re = /.$/;
111 | w = w.replace(re,"");
112 | }
113 | else if (re4.test(w))
114 | w = w + "e";
115 | }
116 | }
117 |
118 | // Step 1c
119 | re = /^(.+?)y$/;
120 | if (re.test(w)) {
121 | var fp = re.exec(w);
122 | stem = fp[1];
123 | re = new RegExp(s_v);
124 | if (re.test(stem))
125 | w = stem + "i";
126 | }
127 |
128 | // Step 2
129 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
130 | if (re.test(w)) {
131 | var fp = re.exec(w);
132 | stem = fp[1];
133 | suffix = fp[2];
134 | re = new RegExp(mgr0);
135 | if (re.test(stem))
136 | w = stem + step2list[suffix];
137 | }
138 |
139 | // Step 3
140 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
141 | if (re.test(w)) {
142 | var fp = re.exec(w);
143 | stem = fp[1];
144 | suffix = fp[2];
145 | re = new RegExp(mgr0);
146 | if (re.test(stem))
147 | w = stem + step3list[suffix];
148 | }
149 |
150 | // Step 4
151 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
152 | re2 = /^(.+?)(s|t)(ion)$/;
153 | if (re.test(w)) {
154 | var fp = re.exec(w);
155 | stem = fp[1];
156 | re = new RegExp(mgr1);
157 | if (re.test(stem))
158 | w = stem;
159 | }
160 | else if (re2.test(w)) {
161 | var fp = re2.exec(w);
162 | stem = fp[1] + fp[2];
163 | re2 = new RegExp(mgr1);
164 | if (re2.test(stem))
165 | w = stem;
166 | }
167 |
168 | // Step 5
169 | re = /^(.+?)e$/;
170 | if (re.test(w)) {
171 | var fp = re.exec(w);
172 | stem = fp[1];
173 | re = new RegExp(mgr1);
174 | re2 = new RegExp(meq1);
175 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
176 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
177 | w = stem;
178 | }
179 | re = /ll$/;
180 | re2 = new RegExp(mgr1);
181 | if (re.test(w) && re2.test(w)) {
182 | re = /.$/;
183 | w = w.replace(re,"");
184 | }
185 |
186 | // and turn initial Y back to y
187 | if (firstch == "y")
188 | w = firstch.toLowerCase() + w.substr(1);
189 | return w;
190 | }
191 | }
192 |
--------------------------------------------------------------------------------
/docs/_static/sphinx_highlight.js:
--------------------------------------------------------------------------------
1 | /* Highlighting utilities for Sphinx HTML documentation. */
2 | "use strict";
3 |
4 | const SPHINX_HIGHLIGHT_ENABLED = true
5 |
6 | /**
7 | * highlight a given string on a node by wrapping it in
8 | * span elements with the given class name.
9 | */
10 | const _highlight = (node, addItems, text, className) => {
11 | if (node.nodeType === Node.TEXT_NODE) {
12 | const val = node.nodeValue;
13 | const parent = node.parentNode;
14 | const pos = val.toLowerCase().indexOf(text);
15 | if (
16 | pos >= 0 &&
17 | !parent.classList.contains(className) &&
18 | !parent.classList.contains("nohighlight")
19 | ) {
20 | let span;
21 |
22 | const closestNode = parent.closest("body, svg, foreignObject");
23 | const isInSVG = closestNode && closestNode.matches("svg");
24 | if (isInSVG) {
25 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
26 | } else {
27 | span = document.createElement("span");
28 | span.classList.add(className);
29 | }
30 |
31 | span.appendChild(document.createTextNode(val.substr(pos, text.length)));
32 | const rest = document.createTextNode(val.substr(pos + text.length));
33 | parent.insertBefore(
34 | span,
35 | parent.insertBefore(
36 | rest,
37 | node.nextSibling
38 | )
39 | );
40 | node.nodeValue = val.substr(0, pos);
41 | /* There may be more occurrences of search term in this node. So call this
42 | * function recursively on the remaining fragment.
43 | */
44 | _highlight(rest, addItems, text, className);
45 |
46 | if (isInSVG) {
47 | const rect = document.createElementNS(
48 | "http://www.w3.org/2000/svg",
49 | "rect"
50 | );
51 | const bbox = parent.getBBox();
52 | rect.x.baseVal.value = bbox.x;
53 | rect.y.baseVal.value = bbox.y;
54 | rect.width.baseVal.value = bbox.width;
55 | rect.height.baseVal.value = bbox.height;
56 | rect.setAttribute("class", className);
57 | addItems.push({ parent: parent, target: rect });
58 | }
59 | }
60 | } else if (node.matches && !node.matches("button, select, textarea")) {
61 | node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
62 | }
63 | };
64 | const _highlightText = (thisNode, text, className) => {
65 | let addItems = [];
66 | _highlight(thisNode, addItems, text, className);
67 | addItems.forEach((obj) =>
68 | obj.parent.insertAdjacentElement("beforebegin", obj.target)
69 | );
70 | };
71 |
72 | /**
73 | * Small JavaScript module for the documentation.
74 | */
75 | const SphinxHighlight = {
76 |
77 | /**
78 | * highlight the search words provided in localstorage in the text
79 | */
80 | highlightSearchWords: () => {
81 | if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight
82 |
83 | // get and clear terms from localstorage
84 | const url = new URL(window.location);
85 | const highlight =
86 | localStorage.getItem("sphinx_highlight_terms")
87 | || url.searchParams.get("highlight")
88 | || "";
89 | localStorage.removeItem("sphinx_highlight_terms")
90 | url.searchParams.delete("highlight");
91 | window.history.replaceState({}, "", url);
92 |
93 | // get individual terms from highlight string
94 | const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
95 | if (terms.length === 0) return; // nothing to do
96 |
97 | // There should never be more than one element matching "div.body"
98 | const divBody = document.querySelectorAll("div.body");
99 | const body = divBody.length ? divBody[0] : document.querySelector("body");
100 | window.setTimeout(() => {
101 | terms.forEach((term) => _highlightText(body, term, "highlighted"));
102 | }, 10);
103 |
104 | const searchBox = document.getElementById("searchbox");
105 | if (searchBox === null) return;
106 | searchBox.appendChild(
107 | document
108 | .createRange()
109 | .createContextualFragment(
110 | '' +
111 | '' +
112 | _("Hide Search Matches") +
113 | "
"
114 | )
115 | );
116 | },
117 |
118 | /**
119 | * helper function to hide the search marks again
120 | */
121 | hideSearchWords: () => {
122 | document
123 | .querySelectorAll("#searchbox .highlight-link")
124 | .forEach((el) => el.remove());
125 | document
126 | .querySelectorAll("span.highlighted")
127 | .forEach((el) => el.classList.remove("highlighted"));
128 | localStorage.removeItem("sphinx_highlight_terms")
129 | },
130 |
131 | initEscapeListener: () => {
132 | // only install a listener if it is really needed
133 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
134 |
135 | document.addEventListener("keydown", (event) => {
136 | // bail for input elements
137 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
138 | // bail with special keys
139 | if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
140 | if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
141 | SphinxHighlight.hideSearchWords();
142 | event.preventDefault();
143 | }
144 | });
145 | },
146 | };
147 |
148 | _ready(() => {
149 | /* Do not call highlightSearchWords() when we are on the search page.
150 | * It will highlight words from the *previous* search query.
151 | */
152 | if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords();
153 | SphinxHighlight.initEscapeListener();
154 | });
155 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | ## 🌳 coverforest - Conformal Predictions with Random Forest
6 |
7 | A simple and fast implementation of conformal random forests for both classification and regression tasks. **coverforest** extends [scikit-learn](https://scikit-learn.org)'s random forest implementation to provide prediction sets/intervals with guaranteed coverage using conformal prediction methods.
8 |
9 | **coverforest** provides three conformal prediction methods for random forests:
10 | - CV+ (Cross-Validation+) [[1](#1), [2](#2)].
11 | - Jackknife+-after-Bootstrap [[3]](#3).
12 | - Split Conformal [[4]](#4).
13 |
14 | The library provides two main classes: `CoverForestRegressor` for interval prediction and `CoverForestClassifier`. for set prediction.
15 | Here are quick runs of the two classes:
16 |
17 | ```python
18 | from coverforest import CoverForestRegressor
19 |
20 | reg = CoverForestRegressor(n_estimators=100, method='bootstrap') # using J+-a-Bootstrap
21 | reg.fit(X_train, y_train)
22 | y_pred, y_intervals = reg.predict(X_test, alpha=0.05) # 95% coverage intervals
23 | ```
24 |
25 | ```python
26 | from coverforest import CoverForestClassifier
27 |
28 | clf = CoverForestClassifier(n_estimators=100, method='cv') # using CV+
29 | clf.fit(X_train, y_train)
30 | y_pred, y_sets = clf.predict(X_test, alpha=0.05) # 95% coverage sets
31 | ```
32 |
33 | You can try these models in Colab: [[Classification](https://colab.research.google.com/github/donlapark/coverforest/blob/main/notebooks/classification_pipeline.ipynb)] [[Regression](https://colab.research.google.com/github/donlapark/coverforest/blob/main/notebooks/regression_pipeline.ipynb)]
34 |
35 | For additional examples and package API, see [Documentation](https://donlapark.github.io/coverforest).
36 |
37 | ## 🔧 Requirements
38 |
39 | - Python >=3.9
40 | - Scikit-learn >=1.6.0
41 |
42 | ## ⚡ Installation
43 |
44 | You can install **coverforest** using pip:
45 |
46 | ```bash
47 | pip install coverforest
48 | ```
49 |
50 | Or install from source:
51 |
52 | ```bash
53 | git clone https://github.com/donlapark/coverforest.git
54 | cd coverforest
55 | pip install .
56 | ```
57 |
58 | ### Regularization in conformal set predictions
59 |
60 | The classifier includes two regularization parameters $k$ and $\lambda$ that encourage smaller prediction sets [[5]](#5).
61 |
62 | ```python
63 | clf = CoverForestClassifier(n_estimators=100, method='cv', k_init=2, lambda_init=0.1)
64 | ```
65 |
66 | Automatic searching for suitable $k$ and $\lambda$ is also possible by specifying `k_init="auto"` and `lambda_init="auto"`, which are the default values of `CoverForestClassifier`.
67 |
68 | ### Performance Tips
69 |
70 | Random forest leverages parallel computation by processing trees concurrently. Use the `n_jobs` parameter in `fit()` and `predict()` to control CPU usage (`n_jobs=-1` uses all cores).
71 |
72 | For prediction, conformity score calculations require a memory array of size `(n_train × n_test × n_classes)`. To optimize performance with high `n_jobs` values, split large test sets into smaller batches.
73 |
74 | See the documentation for more details and examples.
75 |
76 | ## 🔗 See Also
77 |
78 | - [MAPIE](https://github.com/scikit-learn-contrib/MAPIE): A Python package that provides scikit-learn-compatible wrappers for conformal classification and regression
79 | - [conforest](https://github.com/knrumsey/conforest) An R implementation of random forest with inductive conformal prediction.
80 | - [clover](https://github.com/Monoxido45/clover) A Python implementation of a regression forest method for conditional coverage ($`P(Y \vert X =x)`$) guarantee.
81 | - [Conformal Prediction](https://github.com/aangelopoulos/conformal-prediction): Jupyter Notebook demonstrations of conformal prediction on various tasks, such as image classification, image segmentation, times series forecasting, and outlier detection
82 | - [TorchCP](https://github.com/ml-stat-Sustech/TorchCP) A Python toolbox for Conformal Prediction in Deep Learning built on top of PyTorch
83 | - [crepes](https://github.com/henrikbostrom/crepes) A Python package that implements standard and Mondrian conformal classifiers as well as standard, normalized and Mondrian conformal regressors and predictive systems.
84 | - [nonconformist](https://github.com/donlnz/nonconformist): One of the first Python implementations of conformal prediction
85 |
86 |
87 | ## 📖 References
88 |
89 | [1] Yaniv Romano, Matteo Sesia & Emmanuel J. Candès, "Classification with Valid and Adaptive Coverage", NeurIPS 2020.
90 |
91 | [2] Rina Foygel Barber, Emmanuel J. Candès, Aaditya Ramdas & Ryan J. Tibshirani, "Predictive inference with the jackknife+", Ann. Statist. 49 (1) 486-507, 2021.
92 |
93 | [3] Byol Kim, Chen Xu, Rina Foygel Barber, "Predictive inference is free with the jackknife+-after-bootstrap", NeurIPS 2020.
94 |
95 | [4] Vladimir Vovk, Ilia Nouretdinov, Valery Manokhin & Alexander Gammerman, "Cross-conformal predictive distributions", 37-51, COPA 2018.
96 |
97 | [5] Anastasios Nikolas Angelopoulos, Stephen Bates, Michael I. Jordan & Jitendra Malik, "Uncertainty Sets for Image Classifiers using Conformal Prediction", ICLR 2021.
98 |
99 | [6] Leo Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001.
100 |
101 | ## 📜 License
102 |
103 | [BSD-3-Clause license](https://github.com/donlapark/coverforest/blob/main/LICENSE)
104 |
105 | ## 📝 Citation
106 |
107 | If you use **coverforest** in your research, please cite:
108 |
109 | ```bibtex
110 | @misc{coverforest2025,
111 | Author = {Panisara Meehinkong and Donlapark Ponnoprat},
112 | Title = {coverforest: Conformal Predictions with Random Forest in Python},
113 | Year = {2025},
114 | Eprint = {arXiv:2501.14570},
115 | }
116 | ```
117 |
--------------------------------------------------------------------------------
/experiments/experiment_classification_1.py:
--------------------------------------------------------------------------------
1 | """Benchmarking CoverForestClassifier's coverage and average set size"""
2 |
3 | from coverforest import CoverForestClassifier
4 | from sklearn.model_selection import train_test_split
5 | import pandas as pd
6 | import numpy as np
7 | import csv
8 | import os
9 | from scipy.stats import mode
10 | import time
11 | from ucimlrepo import fetch_ucirepo
12 | from sklearn.preprocessing import StandardScaler
13 |
14 | import tensorflow as tf
15 |
16 | result_folder = "results_classification"
17 |
18 | if not os.path.exists(result_folder):
19 | os.makedirs(result_folder)
20 |
21 | rng1 = np.random.default_rng(1)
22 | randints1 = rng1.integers(0, high=1e6, size=15)
23 | rng2 = np.random.default_rng(2)
24 | randints2 = rng2.integers(0, high=1e6, size=50)
25 |
26 |
27 | ################################
28 | # Training & Evaluation workflow
29 | ################################
30 |
31 |
32 | def experiment(data_name, X_train, y_train, X_test, y_test, method_list, params):
33 | for method, k_, lambda_ in method_list:
34 | for alpha in [0.2, 0.1, 0.05]:
35 | if lambda_ == "auto":
36 | k_list = []
37 | lambda_list = []
38 | # Searching k_ and lambda_
39 | for k in range(15):
40 | rfclf = CoverForestClassifier(
41 | n_estimators=params[method]["n_estimators"],
42 | method=method,
43 | allow_empty_sets=True,
44 | cv=10,
45 | k_init="auto",
46 | lambda_init="auto",
47 | n_jobs=-1,
48 | random_state=randints1[k],
49 | )
50 | rfclf.fit(X_train, y_train, alpha=alpha)
51 | k_list.append(rfclf.k_star_)
52 | lambda_list.append(rfclf.lambda_star_)
53 | k_best = mode(k_list).mode
54 | lambda_best = mode(lambda_list).mode
55 | print(f"Multiround parameter searching: Round {k + 1} of 15.")
56 | else:
57 | k_best = k_
58 | lambda_best = lambda_
59 | n_preds = []
60 | coverage_probs = []
61 | echo = []
62 | train_times = []
63 | test_times = []
64 | for k in range(50):
65 | rfclf = CoverForestClassifier(
66 | n_estimators=params[method]["n_estimators"],
67 | method=method,
68 | allow_empty_sets=True,
69 | cv=10,
70 | k_init=k_best,
71 | lambda_init=lambda_best,
72 | n_jobs=-1,
73 | n_forests_per_fold=1,
74 | random_state=randints2[k],
75 | )
76 | curr1 = time.time()
77 | rfclf.fit(X_train, y_train, alpha=alpha)
78 | curr2 = time.time()
79 | curr3 = time.time()
80 | _, y_pred = rfclf.predict(X_test, alpha=alpha, binary_output=False)
81 | curr4 = time.time()
82 | train_times.append(curr2 - curr1)
83 | test_times.append(curr4 - curr3)
84 | print("Training + test times:", curr2 - curr1, curr4 - curr3)
85 | avg_size = np.mean([y_pred[i].shape[0] for i in range(len(y_pred))])
86 | cvg_prob = np.mean([y_test[i] in y_pred[i] for i in range(len(y_pred))])
87 | n_preds.append(avg_size)
88 | coverage_probs.append(cvg_prob)
89 | echo.append(k + 1)
90 | print("echo", k, ", alpha =", alpha)
91 | print(f"average size = {avg_size}, coverage = {cvg_prob}.")
92 |
93 | method_ = method.replace("_", "")
94 | filename = f"{data_name}_{method_}_{k_}_{alpha}.csv"
95 | with open(filename, "w", newline="") as f:
96 | write = csv.writer(os.path.join(result_folder, f))
97 | write.writerows(
98 | [echo, n_preds, coverage_probs, train_times, test_times]
99 | )
100 |
101 |
102 | # Set up methods and parameters.
103 | method_list = [
104 | ("bootstrap", "auto", "auto"),
105 | ("bootstrap", 0, 0),
106 | ("cv", "auto", "auto"),
107 | ("cv", 0, 0),
108 | ("split", "auto", "auto"),
109 | ("split", 0, 0),
110 | ]
111 |
112 | param_dict = {
113 | "bootstrap": {"n_estimators": int(1000 * 0.9)},
114 | "cv": {"n_estimators": 100},
115 | "split": {"n_estimators": int(1000 * 0.9)},
116 | }
117 |
118 |
119 | ##############
120 | # Mice dataset
121 | ##############
122 |
123 | MiceData = pd.read_csv("data/MiceClean.csv")
124 |
125 | X = MiceData.drop(columns=["class", "MouseID"])
126 | y = MiceData["class"]
127 | y = y.values.reshape(-1, 1)
128 | y = y[:, 0]
129 |
130 | seed = 1
131 | X_train, X_test, y_train, y_test = train_test_split(
132 | X, y, test_size=0.20, stratify=y, random_state=seed
133 | )
134 |
135 | experiment("MiceData", X_train, y_train, X_test, y_test, method_list, param_dict)
136 |
137 |
138 | #####################
139 | # WineQuality dataset
140 | #####################
141 |
142 | wine_quality = pd.read_csv("data/winequality-white.csv", sep=";")
143 |
144 | seed = 123
145 |
146 | sample_wine = wine_quality.sample(n=2000, random_state=seed)
147 |
148 | X = sample_wine.drop(columns=["quality"])
149 | y = sample_wine["quality"]
150 | y = y.to_numpy()
151 |
152 | X_train, X_test, y_train, y_test = train_test_split(
153 | X, y, test_size=0.20, stratify=y, random_state=seed
154 | )
155 |
156 | # Normalize data to prevent numerical overflows.
157 | scaler = StandardScaler()
158 | X_train = scaler.fit_transform(X_train)
159 | X_test = scaler.transform(X_test)
160 |
161 | experiment("WineData", X_train, y_train, X_test, y_test, method_list, param_dict)
162 |
163 |
164 | ####################
165 | # Myocardial dataset
166 | ####################
167 |
168 | myocardial = fetch_ucirepo(id=579)
169 |
170 | X = myocardial.data.features
171 | y = myocardial.data.targets
172 |
173 | y = y["LET_IS"]
174 | y = y.to_numpy()
175 |
176 | seed = 1
177 |
178 | X_train, X_test, y_train, y_test = train_test_split(
179 | X, y, test_size=0.20, stratify=y, random_state=seed
180 | )
181 |
182 | experiment("MyocData", X_train, y_train, X_test, y_test, method_list, param_dict)
183 |
184 |
185 | ###############
186 | # MNIST dataset
187 | ###############
188 |
189 | (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
190 |
191 | X_train = X_train.reshape((X_train.shape[0], -1))
192 | X_test = X_test.reshape((X_test.shape[0], -1))
193 |
194 | X_train = X_train.astype("float32")
195 | X_test = X_test.astype("float32")
196 |
197 | X_train = X_train / 255 # (0,255] / 255 --> (0,1]
198 | X_test = X_test / 255
199 |
200 | X_train = X_train[0:1000]
201 | y_train = y_train[0:1000]
202 |
203 | X_test = X_test[0:250]
204 | y_test = y_test[0:250]
205 |
206 | experiment("MNISTData", X_train, y_train, X_test, y_test, method_list, param_dict)
207 |
--------------------------------------------------------------------------------
/docs/_static/scripts/sphinx-book-theme.js.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"scripts/sphinx-book-theme.js","mappings":"mBASA,IAAIA,EAAsBC,IACG,WAAvBC,SAASC,WACXF,IACSC,SAASE,iBAClBF,SAASE,iBAAiB,mBAAoBH,GAE9CC,SAASG,YAAY,sBAAsB,WACd,YAAvBH,SAASC,YAA0BF,GACzC,GACF,EAkKFK,OAAOC,aAtCY,KACjB,IAAIC,EAAQN,SAASO,cAAc,cACrBD,EAAME,mBAGPC,UAAUC,SAAS,wBAC9BJ,EAAMK,mBACJ,WACA,iDAIJC,WAAW,EA2BbR,OAAOS,iBAzJgB,KACrB,IAAIC,EACDd,SAASe,mBAAoD,OAA/Bf,SAASe,mBACvCf,SAASgB,yBAC6B,OAArChB,SAASgB,wBACb,IAAIC,EAASjB,SAASkB,gBACjBJ,GAQHK,QAAQC,IAAI,8BACRpB,SAASqB,eACXrB,SAASqB,iBACArB,SAASsB,sBAClBtB,SAASsB,yBAXXH,QAAQC,IAAI,+BACRH,EAAOM,kBACTN,EAAOM,oBACEN,EAAOO,yBAChBP,EAAOO,0BASX,EA0IF1B,GAzHkB,KAChB,IAAI2B,EAAgB,GACpB,IAwCIC,EAAc,IAAIC,sBAxCA,CAACC,EAASC,KAE9BD,EAAQE,SAASC,IACf,GAAIA,EAAMC,eAERP,EAAcQ,KAAKF,EAAMG,aAGzB,IAAK,IAAIC,EAAK,EAAGA,EAAKV,EAAcW,OAAQD,IAC1C,GAAIV,EAAcU,KAAQJ,EAAMG,OAAQ,CACtCT,EAAcY,OAAOF,EAAI,GACzB,KACF,CAEJ,IAIEV,EAAcW,OAAS,EACzBpC,SAASO,cAAc,4BAA4BE,UAAU6B,IAAI,QAEjEtC,SACGO,cAAc,4BACdE,UAAU8B,OAAO,OACtB,GAYY,CAEZC,WAAY,qBAaVC,EAAiB,GATG,CACtB,aACA,WACA,SACA,iBACA,aACA,UACA,UAGcX,SAASK,IAEvBM,EAAeR,KAEX,IAAIE,IACJ,QAAQA,IACR,IAAIA,EAAGO,QAAQ,IAAK,OACpB,QAAQP,EAAGO,QAAQ,IAAK,OAE3B,IAEH1C,SAAS2C,iBAAiBF,EAAeG,KAAK,OAAOd,SAASK,IAC5DT,EAAYmB,QAAQV,EAAG,IAIJ,IAAIR,sBA1CO,CAACC,EAASC,KAEpCD,EAAQ,GAAGkB,mBAAmBC,EAAI,EACpC/C,SAASgD,KAAKvC,UAAU6B,IAAI,YAE5BtC,SAASgD,KAAKvC,UAAU8B,OAAO,WACjC,IAqCaM,QAAQ7C,SAASO,cAAc,4BAA4B,IAmD5ET,GA1BA,WACE,IAAImD,EAAkB,CACpB,0BACA,aACA,qBACA,sBACA,wBACA,qBACA,qBACA,cACAL,KAAK,KACP5C,SAAS2C,iBAAiBM,GAAiBnB,SAASK,IAClDA,EAAG1B,UAAU6B,IAAI,UAAU,GAE/B,G","sources":["webpack://sphinx_book_theme/./src/sphinx_book_theme/assets/scripts/index.js"],"sourcesContent":["// Import CSS variables\n// ref: https://css-tricks.com/getting-javascript-to-talk-to-css-and-sass/\nimport \"../styles/index.scss\";\n\n/**\n * A helper function to load scripts when the DOM is loaded.\n * This waits for everything to be on the page first before running, since\n * some functionality doesn't behave properly until everything is ready.\n */\nvar sbRunWhenDOMLoaded = (cb) => {\n if (document.readyState != \"loading\") {\n cb();\n } else if (document.addEventListener) {\n document.addEventListener(\"DOMContentLoaded\", cb);\n } else {\n document.attachEvent(\"onreadystatechange\", function () {\n if (document.readyState == \"complete\") cb();\n });\n }\n};\n\n/**\n * Toggle full-screen with button\n *\n * There are some browser-specific hacks in here:\n * - Safari requires a `webkit` prefix, so this uses conditionals to check for that\n * ref: https://developer.mozilla.org/en-US/docs/Web/API/Fullscreen_API\n */\nvar toggleFullScreen = () => {\n var isInFullScreen =\n (document.fullscreenElement && document.fullscreenElement !== null) ||\n (document.webkitFullscreenElement &&\n document.webkitFullscreenElement !== null);\n let docElm = document.documentElement;\n if (!isInFullScreen) {\n console.log(\"[SBT]: Entering full screen\");\n if (docElm.requestFullscreen) {\n docElm.requestFullscreen();\n } else if (docElm.webkitRequestFullscreen) {\n docElm.webkitRequestFullscreen();\n }\n } else {\n console.log(\"[SBT]: Exiting full screen\");\n if (document.exitFullscreen) {\n document.exitFullscreen();\n } else if (document.webkitExitFullscreen) {\n document.webkitExitFullscreen();\n }\n }\n};\n\n/**\n * Manage scrolling behavior. This is primarily two things:\n *\n * 1. Hide the Table of Contents any time sidebar content is on the screen.\n *\n * This will be triggered any time a sidebar item enters or exits the screen.\n * It adds/removes items from an array if they have entered the screen, and\n * removes them when they exit the screen. It hides the TOC if anything is\n * on-screen.\n *\n * ref: https://developer.mozilla.org/en-US/docs/Web/API/Intersection_Observer_API\n *\n * 2. Add a `scrolled` class to to trigger CSS changes.\n */\nvar initTocHide = () => {\n var onScreenItems = [];\n let hideTocCallback = (entries, observer) => {\n // Check whether any sidebar item is displayed\n entries.forEach((entry) => {\n if (entry.isIntersecting) {\n // If an element just came on screen, add it our list\n onScreenItems.push(entry.target);\n } else {\n // Otherwise, if it's in our list then remove it\n for (let ii = 0; ii < onScreenItems.length; ii++) {\n if (onScreenItems[ii] === entry.target) {\n onScreenItems.splice(ii, 1);\n break;\n }\n }\n }\n });\n\n // Hide the TOC if any margin content is displayed on the screen\n if (onScreenItems.length > 0) {\n document.querySelector(\"div.bd-sidebar-secondary\").classList.add(\"hide\");\n } else {\n document\n .querySelector(\"div.bd-sidebar-secondary\")\n .classList.remove(\"hide\");\n }\n };\n let manageScrolledClassOnBody = (entries, observer) => {\n // The pixel is at the top, so if we're < 0 that it means we've scrolled\n if (entries[0].boundingClientRect.y < 0) {\n document.body.classList.add(\"scrolled\");\n } else {\n document.body.classList.remove(\"scrolled\");\n }\n };\n\n // Set up the intersection observer to watch all margin content\n let options = {\n // Trigger callback when the top of a margin item is 1/3 up the screen\n rootMargin: \"0px 0px -33% 0px\",\n };\n let tocObserver = new IntersectionObserver(hideTocCallback, options);\n // TODO: deprecate popout after v0.5.0\n const selectorClasses = [\n \"marginnote\",\n \"sidenote\",\n \"margin\",\n \"margin-caption\",\n \"full-width\",\n \"sidebar\",\n \"popout\",\n ];\n let marginSelector = [];\n selectorClasses.forEach((ii) => {\n // Use three permutations of each class name because `tag_` and `_` used to be supported\n marginSelector.push(\n ...[\n `.${ii}`,\n `.tag_${ii}`,\n `.${ii.replace(\"-\", \"_\")}`,\n `.tag_${ii.replace(\"-\", \"_\")}`,\n ],\n );\n });\n document.querySelectorAll(marginSelector.join(\", \")).forEach((ii) => {\n tocObserver.observe(ii);\n });\n\n // Set up the observer to check if we've scrolled from top of page\n let scrollObserver = new IntersectionObserver(manageScrolledClassOnBody);\n scrollObserver.observe(document.querySelector(\".sbt-scroll-pixel-helper\"));\n};\n\n/**\n * Activate Thebe with a custom button click.\n */\nvar initThebeSBT = () => {\n var title = document.querySelector(\"section h1\");\n var sibling = title.nextElementSibling;\n // If the next element after the title isn't a thebe button, add one now.\n // That way it is initiatlized when thebe is first-clicked and isn't re-added after.\n if (!sibling.classList.contains(\"thebe-launch-button\")) {\n title.insertAdjacentHTML(\n \"afterend\",\n \"\",\n );\n }\n // This function is provided by sphinx-thebe\n initThebe();\n};\n\n/**\n * Add no print class to certain DOM elements\n */\n\nfunction addNoPrint() {\n var noPrintSelector = [\n \".bd-header-announcement\",\n \".bd-header\",\n \".bd-header-article\",\n \".bd-sidebar-primary\",\n \".bd-sidebar-secondary\",\n \".bd-footer-article\",\n \".bd-footer-content\",\n \".bd-footer\",\n ].join(\",\");\n document.querySelectorAll(noPrintSelector).forEach((ii) => {\n ii.classList.add(\"noprint\");\n });\n}\n\n/**\n * Set up callback functions for UI click actions\n */\nwindow.initThebeSBT = initThebeSBT;\nwindow.toggleFullScreen = toggleFullScreen;\n\n/**\n * Set up functions to load when the DOM is ready\n */\nsbRunWhenDOMLoaded(initTocHide);\nsbRunWhenDOMLoaded(addNoPrint);\n"],"names":["sbRunWhenDOMLoaded","cb","document","readyState","addEventListener","attachEvent","window","initThebeSBT","title","querySelector","nextElementSibling","classList","contains","insertAdjacentHTML","initThebe","toggleFullScreen","isInFullScreen","fullscreenElement","webkitFullscreenElement","docElm","documentElement","console","log","exitFullscreen","webkitExitFullscreen","requestFullscreen","webkitRequestFullscreen","onScreenItems","tocObserver","IntersectionObserver","entries","observer","forEach","entry","isIntersecting","push","target","ii","length","splice","add","remove","rootMargin","marginSelector","replace","querySelectorAll","join","observe","boundingClientRect","y","body","noPrintSelector"],"sourceRoot":""}
2 |
--------------------------------------------------------------------------------
/coverforest/metrics.py:
--------------------------------------------------------------------------------
1 | # coverforest authors: Donlapark Ponnoprat
2 | # Panisara Meehinkong
3 | # License: BSD 3 clause
4 |
5 | from numbers import Real
6 |
7 | import numpy as np
8 | from sklearn.utils._array_api import _average
9 | from sklearn.utils._param_validation import (
10 | Interval,
11 | validate_params,
12 | )
13 | from sklearn.utils.validation import check_consistent_length, column_or_1d
14 |
15 |
16 | @validate_params(
17 | {
18 | "y_true": ["array-like", "sparse matrix"],
19 | "y_pred": [tuple, "array-like", "sparse matrix"],
20 | "beta": [Interval(Real, 0.0, None, closed="both")],
21 | "labels": ["array-like", None],
22 | "sample_weight": ["array-like", None],
23 | },
24 | prefer_skip_nested_validation=True,
25 | )
26 | def classification_coverage_score(y_true, y_pred, *, labels=None, sample_weight=None):
27 | """Compute the empirical coverage for classification prediction sets.
28 |
29 | The coverage score measures the proportion of true labels that are included
30 | in the prediction sets.
31 |
32 | Parameters
33 | ----------
34 | y_true : array-like of shape (n_samples,)
35 | Ground truth (correct) labels.
36 |
37 | y_pred : tuple, list or array-like of shape (n_samples, n_classes)
38 | Binary matrix indicating the predicted set for each sample, where 1
39 | indicates the class is included in the prediction set and 0 indicates
40 | it is not.
41 |
42 | labels : array-like of shape (n_classes,), default=None
43 | List of labels in the same order of the columns of y_pred.
44 |
45 | sample_weight : array-like of shape (n_samples,), default=None
46 | Sample weights. If None, then samples are equally weighted.
47 |
48 | Returns
49 | -------
50 | score : float
51 | Returns the empirical coverage, i.e., the proportion of true labels
52 | included in the prediction sets, weighted by sample_weight.
53 | Best value is 1 and worst value is 0.
54 |
55 | Examples
56 | --------
57 | >>> import numpy as np
58 | >>> from metrics import classification_coverage_score
59 | >>> y_true = [0, 1, 2]
60 | >>> y_pred = np.array([[1, 0, 1], [0, 0, 1], [0, 0, 1]])
61 | >>> labels = [0, 1, 2]
62 | >>> classification_coverage_score(y_true, y_pred, labels=labels)
63 | 0.66...
64 | """
65 |
66 | if isinstance(y_pred, tuple):
67 | y_pred = y_pred[1]
68 |
69 | y_true = column_or_1d(y_true)
70 | n = len(y_pred)
71 | assert y_true.shape[0] == n
72 | check_consistent_length(y_true, sample_weight)
73 |
74 | if isinstance(y_pred, list):
75 | is_in_y_pred = [y_true[i] in y_pred[i] for i in range(n)]
76 | else:
77 | if labels is None:
78 | raise ValueError("`labels` must be specified when `y_pred` is an array.")
79 | class_to_idx = {c: i for i, c in enumerate(labels)}
80 | y_idx = np.vectorize(class_to_idx.__getitem__)(y_true)
81 | is_in_y_pred = y_pred[np.arange(len(y_pred)), y_idx]
82 |
83 | return float(_average(is_in_y_pred, weights=sample_weight))
84 |
85 |
86 | @validate_params(
87 | {
88 | "y_true": ["array-like", "sparse matrix"],
89 | "y_pred": ["array-like", "sparse matrix"],
90 | "sample_weight": ["array-like", None],
91 | },
92 | prefer_skip_nested_validation=True,
93 | )
94 | def average_set_size_loss(y_true, y_pred):
95 | """Compute the average size of classification prediction sets.
96 |
97 | For each sample, the set size is the number of classes included in
98 | the prediction set (sum of binary indicators).
99 |
100 | Parameters
101 | ----------
102 | y_true : array-like of shape (n_samples,)
103 | Ground truth (correct) labels.
104 |
105 | y_pred : tuple, list or array-like of shape (n_samples, n_classes)
106 | Binary matrix indicating the predicted set for each sample, where 1
107 | indicates the class is included in the prediction set and 0 indicates
108 | it is not.
109 |
110 | Returns
111 | -------
112 | score : float
113 | Returns the average prediction set size.
114 | Minimum possible value is 0, maximum is n_classes.
115 |
116 | Examples
117 | --------
118 | >>> import numpy as np
119 | >>> from metrics import average_set_size_loss
120 | >>> y_pred = np.array([[1, 0, 0], [1, 1, 0], [0, 0, 1]])
121 | >>> average_set_size_loss(y_pred)
122 | 1.333...
123 | """
124 |
125 | if isinstance(y_pred, tuple):
126 | y_pred = y_pred[1]
127 |
128 | if isinstance(y_pred, list):
129 | y_sizes = [len(y_pred) for y_pred in y_pred]
130 | else:
131 | y_sizes = y_pred.sum(axis=1)
132 |
133 | return float(_average(y_sizes))
134 |
135 |
136 | @validate_params(
137 | {
138 | "y_true": ["array-like", "sparse matrix"],
139 | "y_pred": ["array-like", "sparse matrix"],
140 | "sample_weight": ["array-like", None],
141 | },
142 | prefer_skip_nested_validation=True,
143 | )
144 | def regression_coverage_score(y_true, y_pred, *, sample_weight=None):
145 | """Compute the empirical coverage for regression prediction intervals.
146 |
147 | The coverage score measures the proportion of true values that fall
148 | within the predicted intervals.
149 |
150 | Parameters
151 | ----------
152 | y_true : array-like of shape (n_samples,)
153 | Ground truth (correct) target values.
154 |
155 | y_pred : tuple, list or array-like of shape (n_samples, 2)
156 | Predicted intervals, where each row contains [lower_bound, upper_bound].
157 |
158 | sample_weight : array-like of shape (n_samples,), default=None
159 | Sample weights. If None, then samples are equally weighted.
160 |
161 | Returns
162 | -------
163 | score : float
164 | Returns the empirical coverage, i.e., the proportion of true values
165 | falling within the prediction intervals, weighted by sample_weight.
166 | Best value is 1 and worst value is 0.
167 |
168 | Examples
169 | --------
170 | >>> import numpy as np
171 | >>> from metrics import regression_coverage_score
172 | >>> y_true = [1.0, -2.0, 3.0]
173 | >>> y_pred = np.array([[0.5, 1.5], [1.5, 2.5], [2.5, 3.5]])
174 | >>> regression_coverage_score(y_true, y_pred)
175 | 0.66...
176 | """
177 |
178 | if isinstance(y_pred, tuple):
179 | y_pred = y_pred[1]
180 |
181 | y_true = column_or_1d(y_true)
182 | check_consistent_length(y_true, y_pred, sample_weight)
183 |
184 | low = y_pred[:, 0]
185 | high = y_pred[:, 1]
186 | return float(_average((low <= y_true) & (y_true <= high), weights=sample_weight))
187 |
188 |
189 | @validate_params(
190 | {
191 | "y_true": ["array-like", "sparse matrix"],
192 | "y_pred": ["array-like", "sparse matrix"],
193 | "sample_weight": ["array-like", None],
194 | },
195 | prefer_skip_nested_validation=True,
196 | )
197 | def average_interval_length_loss(y_true, y_pred):
198 | """Compute the average length of regression prediction intervals.
199 |
200 | For each sample, the interval length is the difference between
201 | the upper and lower bounds.
202 |
203 | Parameters
204 | ----------
205 | y_true : array-like of shape (n_samples,)
206 | Ground truth (correct) labels.
207 |
208 | y_pred : tuple, list or array-like of shape (n_samples, 2)
209 | Predicted intervals, where each row contains [lower_bound, upper_bound].
210 |
211 | Returns
212 | -------
213 | score : float
214 | Returns the average interval length.
215 | Minimum possible value is 0, no maximum value.
216 |
217 | Examples
218 | --------
219 | >>> import numpy as np
220 | >>> from metrics import average_interval_length_loss
221 | >>> y_pred = np.array([[0.5, 2.5], [1.5, 4.5], [2.5, 3.5]])
222 | >>> average_interval_length_loss(y_pred)
223 | 2.0
224 | """
225 |
226 | if isinstance(y_pred, tuple):
227 | y_pred = y_pred[1]
228 |
229 | low = y_pred[:, 0]
230 | high = y_pred[:, 1]
231 | return float(_average(high - low))
232 |
--------------------------------------------------------------------------------
/docs/_static/clipboard.min.js:
--------------------------------------------------------------------------------
1 | /*!
2 | * clipboard.js v2.0.8
3 | * https://clipboardjs.com/
4 | *
5 | * Licensed MIT © Zeno Rocha
6 | */
7 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return o}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),c=n.n(e);function a(t){try{return document.execCommand(t)}catch(t){return}}var f=function(t){t=c()(t);return a("cut"),t};var l=function(t){var e,n,o,r=1
62 | ${messages[locale]['copy_success']}
63 |
64 |
65 | `
66 |
67 | // If the user specified their own SVG use that, otherwise use the default
68 | let iconCopy = ``;
69 | if (!iconCopy) {
70 | iconCopy = ``
76 | }
77 |
78 | /**
79 | * Set up copy/paste for code blocks
80 | */
81 |
82 | const runWhenDOMLoaded = cb => {
83 | if (document.readyState != 'loading') {
84 | cb()
85 | } else if (document.addEventListener) {
86 | document.addEventListener('DOMContentLoaded', cb)
87 | } else {
88 | document.attachEvent('onreadystatechange', function() {
89 | if (document.readyState == 'complete') cb()
90 | })
91 | }
92 | }
93 |
94 | const codeCellId = index => `codecell${index}`
95 |
96 | // Clears selected text since ClipboardJS will select the text when copying
97 | const clearSelection = () => {
98 | if (window.getSelection) {
99 | window.getSelection().removeAllRanges()
100 | } else if (document.selection) {
101 | document.selection.empty()
102 | }
103 | }
104 |
105 | // Changes tooltip text for a moment, then changes it back
106 | // We want the timeout of our `success` class to be a bit shorter than the
107 | // tooltip and icon change, so that we can hide the icon before changing back.
108 | var timeoutIcon = 2000;
109 | var timeoutSuccessClass = 1500;
110 |
111 | const temporarilyChangeTooltip = (el, oldText, newText) => {
112 | el.setAttribute('data-tooltip', newText)
113 | el.classList.add('success')
114 | // Remove success a little bit sooner than we change the tooltip
115 | // So that we can use CSS to hide the copybutton first
116 | setTimeout(() => el.classList.remove('success'), timeoutSuccessClass)
117 | setTimeout(() => el.setAttribute('data-tooltip', oldText), timeoutIcon)
118 | }
119 |
120 | // Changes the copy button icon for two seconds, then changes it back
121 | const temporarilyChangeIcon = (el) => {
122 | el.innerHTML = iconCheck;
123 | setTimeout(() => {el.innerHTML = iconCopy}, timeoutIcon)
124 | }
125 |
126 | const addCopyButtonToCodeCells = () => {
127 | // If ClipboardJS hasn't loaded, wait a bit and try again. This
128 | // happens because we load ClipboardJS asynchronously.
129 | if (window.ClipboardJS === undefined) {
130 | setTimeout(addCopyButtonToCodeCells, 250)
131 | return
132 | }
133 |
134 | // Add copybuttons to all of our code cells
135 | const COPYBUTTON_SELECTOR = 'div.highlight pre';
136 | const codeCells = document.querySelectorAll(COPYBUTTON_SELECTOR)
137 | codeCells.forEach((codeCell, index) => {
138 | const id = codeCellId(index)
139 | codeCell.setAttribute('id', id)
140 |
141 | const clipboardButton = id =>
142 | ``
145 | codeCell.insertAdjacentHTML('afterend', clipboardButton(id))
146 | })
147 |
148 | function escapeRegExp(string) {
149 | return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
150 | }
151 |
152 | /**
153 | * Removes excluded text from a Node.
154 | *
155 | * @param {Node} target Node to filter.
156 | * @param {string} exclude CSS selector of nodes to exclude.
157 | * @returns {DOMString} Text from `target` with text removed.
158 | */
159 | function filterText(target, exclude) {
160 | const clone = target.cloneNode(true); // clone as to not modify the live DOM
161 | if (exclude) {
162 | // remove excluded nodes
163 | clone.querySelectorAll(exclude).forEach(node => node.remove());
164 | }
165 | return clone.innerText;
166 | }
167 |
168 | // Callback when a copy button is clicked. Will be passed the node that was clicked
169 | // should then grab the text and replace pieces of text that shouldn't be used in output
170 | function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") {
171 | var regexp;
172 | var match;
173 |
174 | // Do we check for line continuation characters and "HERE-documents"?
175 | var useLineCont = !!lineContinuationChar
176 | var useHereDoc = !!hereDocDelim
177 |
178 | // create regexp to capture prompt and remaining line
179 | if (isRegexp) {
180 | regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)')
181 | } else {
182 | regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)')
183 | }
184 |
185 | const outputLines = [];
186 | var promptFound = false;
187 | var gotLineCont = false;
188 | var gotHereDoc = false;
189 | const lineGotPrompt = [];
190 | for (const line of textContent.split('\n')) {
191 | match = line.match(regexp)
192 | if (match || gotLineCont || gotHereDoc) {
193 | promptFound = regexp.test(line)
194 | lineGotPrompt.push(promptFound)
195 | if (removePrompts && promptFound) {
196 | outputLines.push(match[2])
197 | } else {
198 | outputLines.push(line)
199 | }
200 | gotLineCont = line.endsWith(lineContinuationChar) & useLineCont
201 | if (line.includes(hereDocDelim) & useHereDoc)
202 | gotHereDoc = !gotHereDoc
203 | } else if (!onlyCopyPromptLines) {
204 | outputLines.push(line)
205 | } else if (copyEmptyLines && line.trim() === '') {
206 | outputLines.push(line)
207 | }
208 | }
209 |
210 | // If no lines with the prompt were found then just use original lines
211 | if (lineGotPrompt.some(v => v === true)) {
212 | textContent = outputLines.join('\n');
213 | }
214 |
215 | // Remove a trailing newline to avoid auto-running when pasting
216 | if (textContent.endsWith("\n")) {
217 | textContent = textContent.slice(0, -1)
218 | }
219 | return textContent
220 | }
221 |
222 |
223 | var copyTargetText = (trigger) => {
224 | var target = document.querySelector(trigger.attributes['data-clipboard-target'].value);
225 |
226 | // get filtered text
227 | let exclude = '.linenos';
228 |
229 | let text = filterText(target, exclude);
230 | return formatCopyText(text, '', false, true, true, true, '', '')
231 | }
232 |
233 | // Initialize with a callback so we can modify the text before copy
234 | const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText})
235 |
236 | // Update UI with error/success messages
237 | clipboard.on('success', event => {
238 | clearSelection()
239 | temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_success'])
240 | temporarilyChangeIcon(event.trigger)
241 | })
242 |
243 | clipboard.on('error', event => {
244 | temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_failure'])
245 | })
246 | }
247 |
248 | runWhenDOMLoaded(addCopyButtonToCodeCells)
249 |
--------------------------------------------------------------------------------