├── .gitattributes
├── .github
    ├── CODEOWNERS
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── performance_issue.md
    ├── actions
    │   └── install-env
    │   │   └── action.yml
    ├── pull_request_template.md
    └── workflows
    │   ├── code-quality.yml
    │   ├── delete-caches.yml
    │   ├── dev-docs.yml
    │   ├── pypi.yml
    │   ├── release-docs.yml
    │   └── unit-tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.bib
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Cargo.toml
├── LICENSE
├── Makefile
├── README.md
├── benchmarks
    ├── Batch versus online.ipynb
    ├── Factorization machines.ipynb
    ├── README.md
    ├── binary_classification.csv
    ├── config.py
    ├── details.json
    ├── model_adapters
    │   ├── __init__.py
    │   └── vw.py
    ├── multiclass_classification.csv
    ├── regression.csv
    ├── render.py
    └── run.py
├── build.py
├── docs
    ├── .pages
    ├── CNAME
    ├── benchmarks
    │   ├── .pages
    │   ├── Binary classification
    │   │   ├── binary_classification.csv
    │   │   └── index.md
    │   ├── Multiclass classification
    │   │   ├── index.md
    │   │   └── multiclass_classification.csv
    │   └── Regression
    │   │   ├── index.md
    │   │   └── regression.csv
    ├── css
    │   └── version-select.css
    ├── examples
    │   ├── .pages
    │   ├── batch-to-online.ipynb
    │   ├── bike-sharing-forecasting.ipynb
    │   ├── building-a-simple-nowcasting-model.ipynb
    │   ├── content-personalization.ipynb
    │   ├── debugging-a-pipeline.ipynb
    │   ├── imbalanced-learning.ipynb
    │   ├── matrix-factorization-for-recommender-systems
    │   │   ├── .pages
    │   │   ├── part-1.ipynb
    │   │   ├── part-2.ipynb
    │   │   └── part-3.ipynb
    │   ├── quantile-regression-uncertainty.ipynb
    │   ├── sentence-classification.ipynb
    │   ├── sentence_classification_files
    │   │   ├── sentence_classification_14_0.svg
    │   │   ├── sentence_classification_19_0.svg
    │   │   └── sentence_classification_31_0.svg
    │   └── the-art-of-using-pipelines.ipynb
    ├── faq
    │   ├── .pages
    │   └── index.md
    ├── img
    │   ├── dtree_draw.svg
    │   ├── favicon.ico
    │   ├── favicon_old.ico
    │   ├── histogram_docstring.svg
    │   ├── icon.png
    │   ├── illustration.png
    │   ├── illustration_old.png
    │   ├── logo.svg
    │   ├── logo_white.svg
    │   ├── online_active_learning.png
    │   ├── pipeline_docstring.svg
    │   └── skyline_docstring.svg
    ├── index.md
    ├── introduction
    │   ├── .pages
    │   ├── basic-concepts.md
    │   ├── getting-started
    │   │   ├── binary-classification.ipynb
    │   │   ├── concept-drift-detection.ipynb
    │   │   ├── concept-drift-detection_files
    │   │   │   ├── concept-drift-detection_1_0.png
    │   │   │   └── concept-drift-detection_3_1.png
    │   │   ├── multiclass-classification.ipynb
    │   │   └── regression.ipynb
    │   ├── installation.md
    │   ├── next-steps.md
    │   ├── related-projects.md
    │   └── why-use-river.md
    ├── javascripts
    │   ├── config.js
    │   └── tablesort.js
    ├── js
    │   └── version-select.js
    ├── license
    │   ├── .pages
    │   └── license.md
    ├── overrides
    │   ├── home.html
    │   └── partials
    │   │   ├── footer.html
    │   │   └── integrations
    │   │       └── analytics.html
    ├── parse
    │   └── __main__.py
    ├── recipes
    │   ├── .pages
    │   ├── active-learning.ipynb
    │   ├── bandits-101.ipynb
    │   ├── cloning-and-mutating.ipynb
    │   ├── feature-extraction.ipynb
    │   ├── hyperparameter-tuning.ipynb
    │   ├── mini-batching.ipynb
    │   ├── model-evaluation.ipynb
    │   ├── on-hoeffding-trees.ipynb
    │   ├── on-hoeffding-trees_files
    │   │   ├── on-hoeffding-trees_12_0.svg
    │   │   ├── on-hoeffding-trees_21_0.png
    │   │   ├── on-hoeffding-trees_23_0.png
    │   │   ├── on-hoeffding-trees_25_0.png
    │   │   ├── on-hoeffding-trees_27_0.png
    │   │   └── on-hoeffding-trees_29_0.png
    │   ├── pipelines.ipynb
    │   ├── pipelines_files
    │   │   ├── pipelines_18_0.svg
    │   │   └── pipelines_8_0.svg
    │   ├── reading-data.ipynb
    │   └── rolling-computations.ipynb
    ├── releases
    │   ├── .pages
    │   ├── 0.0.2.md
    │   ├── 0.0.3.md
    │   ├── 0.1.0.md
    │   ├── 0.10.0.md
    │   ├── 0.10.1.md
    │   ├── 0.11.0.md
    │   ├── 0.11.1.md
    │   ├── 0.12.0.md
    │   ├── 0.12.1.md
    │   ├── 0.13.0.md
    │   ├── 0.14.0.md
    │   ├── 0.15.0.md
    │   ├── 0.16.0.md
    │   ├── 0.17.0.md
    │   ├── 0.18.0.md
    │   ├── 0.19.0.md
    │   ├── 0.2.0.md
    │   ├── 0.20.0.md
    │   ├── 0.20.1.md
    │   ├── 0.21.0.md
    │   ├── 0.21.1.md
    │   ├── 0.21.2.md
    │   ├── 0.22.0.md
    │   ├── 0.3.0.md
    │   ├── 0.4.1.md
    │   ├── 0.4.3.md
    │   ├── 0.4.4.md
    │   ├── 0.5.0.md
    │   ├── 0.5.1.md
    │   ├── 0.6.0.md
    │   ├── 0.6.1.md
    │   ├── 0.7.0.md
    │   ├── 0.7.1.md
    │   ├── 0.8.0.md
    │   ├── 0.9.0.md
    │   └── unreleased.md
    └── stylesheets
    │   └── extra.css
├── mkdocs.yml
├── poetry.lock
├── pyproject.toml
├── river
    ├── __init__.py
    ├── __version__.py
    ├── active
    │   ├── __init__.py
    │   ├── base.py
    │   └── entropy.py
    ├── anomaly
    │   ├── __init__.py
    │   ├── base.py
    │   ├── filter.py
    │   ├── gaussian.py
    │   ├── hst.py
    │   ├── lof.py
    │   ├── pad.py
    │   ├── sad.py
    │   ├── svm.py
    │   ├── test_hst.py
    │   ├── test_lof.py
    │   └── test_svm.py
    ├── api.py
    ├── bandit
    │   ├── __init__.py
    │   ├── base.py
    │   ├── bayes_ucb.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── news.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   ├── candy_cane.py
    │   │   └── testbed.py
    │   ├── epsilon_greedy.py
    │   ├── evaluate.py
    │   ├── exp3.py
    │   ├── lin_ucb.py
    │   ├── random.py
    │   ├── test_envs.py
    │   ├── test_policies.py
    │   ├── thompson.py
    │   └── ucb.py
    ├── base
    │   ├── __init__.py
    │   ├── base.py
    │   ├── classifier.py
    │   ├── clusterer.py
    │   ├── drift_detector.py
    │   ├── ensemble.py
    │   ├── estimator.py
    │   ├── multi_output.py
    │   ├── regressor.py
    │   ├── tags.py
    │   ├── test_base.py
    │   ├── transformer.py
    │   ├── typing.py
    │   ├── viz.py
    │   └── wrapper.py
    ├── checks
    │   ├── __init__.py
    │   ├── anomaly.py
    │   ├── clf.py
    │   ├── common.py
    │   ├── model_selection.py
    │   ├── reco.py
    │   └── utils.py
    ├── cluster
    │   ├── __init__.py
    │   ├── clustream.py
    │   ├── dbstream.py
    │   ├── denstream.py
    │   ├── k_means.py
    │   ├── odac.py
    │   ├── streamkmeans.py
    │   ├── test_dbstream.py
    │   └── textclust.py
    ├── compat
    │   ├── __init__.py
    │   ├── river_to_sklearn.py
    │   ├── sklearn_to_river.py
    │   └── test_sklearn.py
    ├── compose
    │   ├── __init__.py
    │   ├── func.py
    │   ├── grouper.py
    │   ├── pipeline.py
    │   ├── product.py
    │   ├── renamer.py
    │   ├── select.py
    │   ├── target_transform.py
    │   ├── test_.py
    │   ├── test_product.py
    │   └── union.py
    ├── conf
    │   ├── __init__.py
    │   ├── interval.py
    │   └── jackknife.py
    ├── conftest.py
    ├── covariance
    │   ├── __init__.py
    │   ├── emp.py
    │   └── test_emp.py
    ├── datasets
    │   ├── __init__.py
    │   ├── airline-passengers.csv
    │   ├── airline_passengers.py
    │   ├── banana.zip
    │   ├── bananas.py
    │   ├── base.py
    │   ├── bikes.py
    │   ├── chick-weights.csv
    │   ├── chick_weights.py
    │   ├── credit_card.py
    │   ├── elec2.py
    │   ├── higgs.py
    │   ├── http.py
    │   ├── insects.py
    │   ├── keystroke.py
    │   ├── malicious_url.py
    │   ├── movielens100k.py
    │   ├── music.py
    │   ├── phishing.csv.gz
    │   ├── phishing.py
    │   ├── restaurants.py
    │   ├── segment.csv.zip
    │   ├── segment.py
    │   ├── sms_spam.py
    │   ├── smtp.py
    │   ├── solar-flare.csv.zip
    │   ├── solar_flare.py
    │   ├── synth
    │   │   ├── __init__.py
    │   │   ├── agrawal.py
    │   │   ├── anomaly_sine.py
    │   │   ├── concept_drift_stream.py
    │   │   ├── friedman.py
    │   │   ├── hyper_plane.py
    │   │   ├── led.py
    │   │   ├── logical.py
    │   │   ├── mixed.py
    │   │   ├── mv.py
    │   │   ├── planes_2d.py
    │   │   ├── random_rbf.py
    │   │   ├── random_tree.py
    │   │   ├── sea.py
    │   │   ├── sine.py
    │   │   ├── stagger.py
    │   │   └── waveform.py
    │   ├── taxis.py
    │   ├── test_datasets.py
    │   ├── trec07.py
    │   ├── trump_approval.csv.gz
    │   ├── trump_approval.py
    │   ├── water-flow.csv
    │   ├── water_flow.py
    │   └── web_traffic.py
    ├── drift
    │   ├── __init__.py
    │   ├── adwin.py
    │   ├── adwin_c.pyi
    │   ├── adwin_c.pyx
    │   ├── binary
    │   │   ├── __init__.py
    │   │   ├── ddm.py
    │   │   ├── eddm.py
    │   │   ├── fhddm.py
    │   │   ├── hddm_a.py
    │   │   └── hddm_w.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── airline_passengers.csv
    │   │   ├── airline_passengers.py
    │   │   ├── apple.csv
    │   │   ├── apple.py
    │   │   ├── base.py
    │   │   ├── bitcoin.csv
    │   │   ├── bitcoin.py
    │   │   ├── brent_crude_oil.csv
    │   │   ├── brent_crude_oil.py
    │   │   ├── occupancy.csv
    │   │   ├── occupancy.py
    │   │   ├── run_log.csv
    │   │   ├── run_log.py
    │   │   ├── uk_coal_employment.csv
    │   │   └── uk_coal_employment.py
    │   ├── dummy.py
    │   ├── kswin.py
    │   ├── no_drift.py
    │   ├── page_hinkley.py
    │   ├── retrain.py
    │   └── test_drift_detectors.py
    ├── dummy.py
    ├── ensemble
    │   ├── __init__.py
    │   ├── bagging.py
    │   ├── boosting.py
    │   ├── ewa.py
    │   ├── stacking.py
    │   ├── streaming_random_patches.py
    │   └── voting.py
    ├── evaluate
    │   ├── __init__.py
    │   ├── gen.py
    │   ├── progressive_validation.py
    │   └── tracks.py
    ├── facto
    │   ├── __init__.py
    │   ├── base.py
    │   ├── ffm.py
    │   ├── fm.py
    │   ├── fwfm.py
    │   └── hofm.py
    ├── feature_extraction
    │   ├── __init__.py
    │   ├── agg.py
    │   ├── kernel_approx.py
    │   ├── poly.py
    │   ├── test_agg.py
    │   ├── test_vectorize.py
    │   └── vectorize.py
    ├── feature_selection
    │   ├── __init__.py
    │   ├── k_best.py
    │   ├── random.py
    │   └── variance.py
    ├── forest
    │   ├── __init__.py
    │   ├── adaptive_random_forest.py
    │   ├── aggregated_mondrian_forest.py
    │   ├── online_extra_trees.py
    │   └── test_amf.py
    ├── imblearn
    │   ├── __init__.py
    │   ├── chebyshev.py
    │   ├── hard_sampling.py
    │   └── random.py
    ├── linear_model
    │   ├── __init__.py
    │   ├── alma.py
    │   ├── base.py
    │   ├── bayesian_lin_reg.py
    │   ├── lin_reg.py
    │   ├── log_reg.py
    │   ├── pa.py
    │   ├── perceptron.py
    │   ├── softmax.py
    │   └── test_glm.py
    ├── metrics
    │   ├── __init__.py
    │   ├── accuracy.py
    │   ├── balanced_accuracy.py
    │   ├── base.py
    │   ├── confusion.py
    │   ├── cross_entropy.py
    │   ├── efficient_rollingrocauc
    │   │   ├── __init__.py
    │   │   ├── cpp
    │   │   │   ├── RollingROCAUC.cpp
    │   │   │   └── RollingROCAUC.hpp
    │   │   ├── efficient_rollingrocauc.pxd
    │   │   ├── efficient_rollingrocauc.pyi
    │   │   └── efficient_rollingrocauc.pyx
    │   ├── expected_mutual_info.pyi
    │   ├── expected_mutual_info.pyx
    │   ├── fbeta.py
    │   ├── fowlkes_mallows.py
    │   ├── geometric_mean.py
    │   ├── jaccard.py
    │   ├── kappa.py
    │   ├── log_loss.py
    │   ├── mae.py
    │   ├── mape.py
    │   ├── mcc.py
    │   ├── mse.py
    │   ├── multioutput
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── confusion.py
    │   │   ├── exact_match.py
    │   │   ├── macro.py
    │   │   ├── micro.py
    │   │   ├── per_output.py
    │   │   ├── sample_average.py
    │   │   └── test_multioutput_metrics.py
    │   ├── mutual_info.py
    │   ├── precision.py
    │   ├── r2.py
    │   ├── rand.py
    │   ├── recall.py
    │   ├── report.py
    │   ├── roc_auc.py
    │   ├── rolling_roc_auc.py
    │   ├── silhouette.py
    │   ├── smape.py
    │   ├── test_confusion.py
    │   ├── test_cross_entropy.py
    │   ├── test_fbeta.py
    │   ├── test_log_loss.py
    │   ├── test_metrics.py
    │   ├── test_r2.py
    │   └── vbeta.py
    ├── misc
    │   ├── __init__.py
    │   ├── sdft.py
    │   └── skyline.py
    ├── model_selection
    │   ├── __init__.py
    │   ├── bandit.py
    │   ├── base.py
    │   ├── greedy.py
    │   ├── sh.py
    │   └── test_bandit.py
    ├── multiclass
    │   ├── __init__.py
    │   ├── occ.py
    │   ├── ovo.py
    │   ├── ovr.py
    │   └── test_ovr.py
    ├── multioutput
    │   ├── __init__.py
    │   ├── chain.py
    │   └── encoder.py
    ├── naive_bayes
    │   ├── __init__.py
    │   ├── base.py
    │   ├── bernoulli.py
    │   ├── complement.py
    │   ├── gaussian.py
    │   ├── multinomial.py
    │   └── test_naive_bayes.py
    ├── neighbors
    │   ├── __init__.py
    │   ├── ann
    │   │   ├── __init__.py
    │   │   ├── nn_vertex.py
    │   │   └── swinn.py
    │   ├── base.py
    │   ├── knn_classifier.py
    │   ├── knn_regressor.py
    │   └── lazy.py
    ├── neural_net
    │   ├── __init__.py
    │   ├── activations.py
    │   └── mlp.py
    ├── optim
    │   ├── __init__.py
    │   ├── ada_bound.py
    │   ├── ada_delta.py
    │   ├── ada_grad.py
    │   ├── ada_max.py
    │   ├── adam.py
    │   ├── ams_grad.py
    │   ├── average.py
    │   ├── base.py
    │   ├── ftrl.py
    │   ├── initializers.py
    │   ├── losses.py
    │   ├── momentum.py
    │   ├── nadam.py
    │   ├── nesterov.py
    │   ├── newton.py
    │   ├── rms_prop.py
    │   ├── schedulers.py
    │   ├── sgd.py
    │   └── test_.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── feature_hasher.py
    │   ├── impute.py
    │   ├── lda.py
    │   ├── one_hot.py
    │   ├── ordinal.py
    │   ├── pred_clipper.py
    │   ├── random_projection.py
    │   ├── scale.py
    │   ├── scale_target.py
    │   ├── test_lda.py
    │   ├── test_random_projection.py
    │   └── test_scale.py
    ├── proba
    │   ├── __init__.py
    │   ├── base.py
    │   ├── beta.py
    │   ├── gaussian.py
    │   ├── multinomial.py
    │   └── test_gaussian.py
    ├── py.typed
    ├── reco
    │   ├── __init__.py
    │   ├── base.py
    │   ├── baseline.py
    │   ├── biased_mf.py
    │   ├── funk_mf.py
    │   └── normal.py
    ├── rules
    │   ├── __init__.py
    │   ├── amrules.py
    │   └── base.py
    ├── sketch
    │   ├── __init__.py
    │   ├── counter.py
    │   ├── heavy_hitters.py
    │   ├── histogram.py
    │   └── set.py
    ├── stats
    │   ├── __init__.py
    │   ├── _rust_stats.pyi
    │   ├── auto_corr.py
    │   ├── base.py
    │   ├── count.py
    │   ├── cov.py
    │   ├── entropy.py
    │   ├── ewmean.py
    │   ├── ewvar.py
    │   ├── iqr.py
    │   ├── kolmogorov_smirnov.py
    │   ├── kurtosis.py
    │   ├── link.py
    │   ├── mad.py
    │   ├── maximum.py
    │   ├── mean.py
    │   ├── minimum.py
    │   ├── mode.py
    │   ├── n_unique.py
    │   ├── pearson.py
    │   ├── ptp.py
    │   ├── quantile.py
    │   ├── sem.py
    │   ├── shift.py
    │   ├── skew.py
    │   ├── summing.py
    │   ├── test_kolmogorov_smirnov.py
    │   ├── test_parallel.py
    │   ├── test_quantile.py
    │   ├── test_stats.py
    │   ├── test_var.py
    │   └── var.py
    ├── stream
    │   ├── __init__.py
    │   ├── cache.py
    │   ├── iter_arff.py
    │   ├── iter_array.py
    │   ├── iter_csv.py
    │   ├── iter_libsvm.py
    │   ├── iter_pandas.py
    │   ├── iter_polars.py
    │   ├── iter_sklearn.py
    │   ├── iter_sql.py
    │   ├── iter_vaex.py
    │   ├── pokedb.zip
    │   ├── qa.py
    │   ├── shuffling.py
    │   ├── test_iter_csv.py
    │   ├── test_sql.py
    │   ├── tweet_stream.py
    │   ├── twitch_chat_stream.py
    │   └── utils.py
    ├── test_estimators.py
    ├── time_series
    │   ├── __init__.py
    │   ├── base.py
    │   ├── evaluate.py
    │   ├── holt_winters.py
    │   ├── metrics.py
    │   ├── snarimax.py
    │   ├── test_evaluate.py
    │   ├── test_holt_winters.py
    │   └── test_snarimax.py
    ├── tree
    │   ├── __init__.py
    │   ├── base.py
    │   ├── extremely_fast_decision_tree.py
    │   ├── hoeffding_adaptive_tree_classifier.py
    │   ├── hoeffding_adaptive_tree_regressor.py
    │   ├── hoeffding_tree.py
    │   ├── hoeffding_tree_classifier.py
    │   ├── hoeffding_tree_regressor.py
    │   ├── isoup_tree_regressor.py
    │   ├── last_classifier.py
    │   ├── losses.py
    │   ├── mondrian
    │   │   ├── __init__.py
    │   │   ├── mondrian_tree.py
    │   │   ├── mondrian_tree_classifier.py
    │   │   ├── mondrian_tree_nodes.py
    │   │   └── mondrian_tree_regressor.py
    │   ├── nodes
    │   │   ├── __init__.py
    │   │   ├── arf_htc_nodes.py
    │   │   ├── arf_htr_nodes.py
    │   │   ├── branch.py
    │   │   ├── efdtc_nodes.py
    │   │   ├── et_nodes.py
    │   │   ├── hatc_nodes.py
    │   │   ├── hatr_nodes.py
    │   │   ├── htc_nodes.py
    │   │   ├── htr_nodes.py
    │   │   ├── isouptr_nodes.py
    │   │   ├── last_nodes.py
    │   │   ├── leaf.py
    │   │   └── sgt_nodes.py
    │   ├── setup.py
    │   ├── split_criterion
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── gini_split_criterion.py
    │   │   ├── hellinger_distance_criterion.py
    │   │   ├── info_gain_split_criterion.py
    │   │   ├── intra_cluster_variance_reduction_split_criterion.py
    │   │   ├── variance_ratio_split_criterion.py
    │   │   └── variance_reduction_split_criterion.py
    │   ├── splitter
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── ebst_splitter.py
    │   │   ├── exhaustive_splitter.py
    │   │   ├── gaussian_splitter.py
    │   │   ├── histogram_splitter.py
    │   │   ├── nominal_splitter_classif.py
    │   │   ├── nominal_splitter_reg.py
    │   │   ├── qo_splitter.py
    │   │   ├── random_splitter.py
    │   │   ├── sgt_quantizer.py
    │   │   └── tebst_splitter.py
    │   ├── stochastic_gradient_tree.py
    │   ├── test_base.py
    │   ├── test_splitter.py
    │   ├── test_trees.py
    │   ├── utils.py
    │   └── viz.py
    └── utils
    │   ├── __init__.py
    │   ├── context_managers.py
    │   ├── inspect.py
    │   ├── math.py
    │   ├── norm.py
    │   ├── param_grid.py
    │   ├── pretty.py
    │   ├── random.py
    │   ├── rolling.py
    │   ├── sorted_window.py
    │   ├── test_math.py
    │   ├── test_param_grid.py
    │   ├── test_rolling.py
    │   ├── test_vectordict.py
    │   ├── vectordict.pyi
    │   └── vectordict.pyx
└── rust_src
    └── lib.rs


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb filter=nbstripout
2 | *.ipynb diff=ipynb
3 | *.ipynb linguist-detectable=false
4 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @MaxHalford @smastelini
2 | river/facto @gbolmier
3 | river/stats @AdilZouitine
4 | river/cluster @hoanganhngo610 @Dennis1989
5 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: MaxHalford
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us reproduce and correct the bug
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!--
11 | Before submitting a bug, please make sure the issue hasn't been already
12 | addressed by searching through the past issues.
13 | -->
14 | ## Versions
15 | <!--
16 | Describe all the versions that can help us to correct the bug
17 | -->
18 | 
19 | **river version**:
20 | **Python version**:
21 | **Operating system**:
22 | 
23 | ## Describe the bug
24 | <!--
25 | A clear and concise description of the bug.
26 | -->
27 | 
28 | ## Steps/code to reproduce
29 | <!--
30 | Please add a minimal example that we can reproduce the error by running the
31 | code. Be as succinct as possible. In short, we
32 | are going to copy-paste your code and we expect to get the same
33 | result as you.
34 | 
35 | If we need to have access to a particular dataset to reproduce the error,
36 | please attach it (or a reduced version if the file is too big) in a file (csv, json...).
37 | If you are not allowed to share the dataset, please try to create a minimal example with synthetic
38 | data which can allow us to reproduce the bug.
39 | 
40 | If the code is too long, feel free to put it in a public gist and link
41 | it in the issue: https://gist.github.com
42 | -->
43 | 
44 | ```python
45 | # Sample code to reproduce the problem
46 | # Please do your best to provide a Minimal, Reproducible Example: https://stackoverflow.com/help/minimal-reproducible-example
47 | ```
48 | 
49 | 
50 | <!-- Thanks for contributing! -->
51 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/performance_issue.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Performance issue
 3 | about: Provide a reproducible example to debug a performance issue
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Versions
11 | <!--
12 | Describe all the versions that can help us understand your setup
13 | -->
14 | 
15 | **`creme` version**: 
16 | 
17 | **Python version**:
18 | 
19 | ## Describe your task
20 | <!--
21 | A clear and concise description of what you're doing.
22 | -->
23 | 
24 | ## What kind of performance are you expecting?
25 | <!--
26 | This might relate to accuracy and/or speed.
27 | -->
28 | 
29 | ## Steps/code to reproduce
30 | 
31 | <!--
32 | Ideally, this should be copy/pastable in a notebook and should just work out of the box.
33 | -->
34 | 
35 | ```python
36 | # Sample code to reproduce the performance issue
37 | ```
38 | 
39 | ## Necessary data
40 | 
41 | <!--
42 | If you're using a dataset, then please provide us with one.
43 | If the dataset is private, then find some way to anonymize it or provide an alternative dataset.
44 | Understand that we can't help you without being able to reproduce what you're doing.
45 | -->
46 | 
47 | 
48 | <!-- Thanks for contributing! -->
49 | 


--------------------------------------------------------------------------------
/.github/actions/install-env/action.yml:
--------------------------------------------------------------------------------
 1 | name: Install env
 2 | 
 3 | inputs:
 4 |   python-version:
 5 |     description: "Python version to use"
 6 |     required: true
 7 |   build-root:
 8 |     default: "true"
 9 |     options:
10 |       - true
11 |       - false
12 | 
13 | runs:
14 |   using: "composite"
15 |   steps:
16 |     - name: Check out repository
17 |       uses: actions/checkout@v4
18 | 
19 |     - name: Set up Python
20 |       id: set-up-python
21 |       uses: actions/setup-python@v5
22 |       with:
23 |         python-version: ${{ inputs.python-version }}
24 | 
25 |     # Getting errors since using not the latest Python version in docs workflows
26 |     # - name: Load cached Poetry installation
27 |     #   uses: actions/cache@v4
28 |     #   with:
29 |     #     path: ~/.local # the path depends on the OS
30 |     #     key: poetry-3  # modify to reset cache
31 | 
32 |     - name: Install poetry
33 |       uses: snok/install-poetry@v1
34 |       with:
35 |         virtualenvs-create: true
36 |         virtualenvs-in-project: true
37 |         installer-parallel: true
38 | 
39 |     - name: Load cached virtual env
40 |       id: cached-poetry-dependencies
41 |       uses: actions/cache@v4
42 |       with:
43 |         path: .venv
44 |         key: venv-${{ runner.os }}-${{ steps.set-up-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
45 | 
46 |     - name: Install dependencies
47 |       shell: bash
48 |       if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
49 |       run: poetry install --no-interaction --no-ansi --no-root
50 | 
51 |     - name: Build
52 |       shell: bash
53 |       if: ${{ inputs.build-root == 'true' }}
54 |       run: poetry install --no-interaction --no-ansi
55 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | READ ME!
 3 | 
 4 | Thanks for contributing to River!
 5 | 
 6 | If you're new to the project, then we encourage you to first [open a discussion](https://github.com/online-ml/river/discussions/new). This helps everyone save time by making sure we're all aligned on the contribution that is being made.
 7 | 
 8 | Have a great day.
 9 | -->
10 | 


--------------------------------------------------------------------------------
/.github/workflows/code-quality.yml:
--------------------------------------------------------------------------------
 1 | name: code-quality
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - "*"
 7 |   push:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   ubuntu:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 | 
17 |       - name: Build River
18 |         uses: ./.github/actions/install-env
19 |         with:
20 |           python-version: "3.13"
21 |           build-root: false
22 | 
23 |       - name: MyPy type check
24 |         run: poetry run mypy
25 | 
26 |       - name: Ruff code linting
27 |         run: poetry run ruff check --output-format=github river/
28 | 
29 |       - name: Ruff code formatting
30 |         run: poetry run ruff format --check river/
31 | 


--------------------------------------------------------------------------------
/.github/workflows/delete-caches.yml:
--------------------------------------------------------------------------------
 1 | name: Clear all Github Actions caches
 2 | on:
 3 |   workflow_dispatch:
 4 |   schedule:
 5 |     - cron: "0 0 * * 0"
 6 | 
 7 | jobs:
 8 |   my-job:
 9 |     name: Delete all caches
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Clear caches
14 |         uses: easimon/wipe-cache@main
15 | 


--------------------------------------------------------------------------------
/.github/workflows/dev-docs.yml:
--------------------------------------------------------------------------------
 1 | name: dev-docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   docs:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v4
14 | 
15 |       - name: Build River
16 |         uses: ./.github/actions/install-env
17 |         with:
18 |           # Use 3.12 for the docs env waiting for spaCy and srsly to support 3.13
19 |           python-version: "3.12"
20 |           build-root: false
21 | 
22 |       - name: Install extra Ubuntu dependencies
23 |         run: sudo apt-get install graphviz pandoc
24 | 
25 |       - name: Install extra Python dependencies
26 |         run: |
27 |           poetry install --with docs
28 | 
29 |       - name: Build docs
30 |         run: |
31 |           source $VENV
32 |           make doc
33 | 
34 |       - name: Deploy docs
35 |         env:
36 |           GH_TOKEN: ${{ secrets.GitHubToken }}
37 |         run: |
38 |           source $VENV
39 |           git config user.name github-actions
40 |           git config user.email github-actions@github.com
41 |           git config pull.rebase false
42 |           git add --all
43 |           git commit -m "Release dev docs" --allow-empty
44 |           git fetch
45 |           git checkout gh-pages
46 |           git pull
47 |           git checkout main
48 |           mike deploy dev --push --remote https://github.com/${{ github.repository }}.git
49 | 


--------------------------------------------------------------------------------
/.github/workflows/unit-tests.yml:
--------------------------------------------------------------------------------
 1 | name: unit-tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - "*"
 7 |   push:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   run:
13 |     strategy:
14 |       matrix:
15 |         os: [ubuntu-latest]
16 |         python-version: ["3.13", "3.12", "3.11", "3.10"]
17 | 
18 |     runs-on: ${{ matrix.os }}
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v4
22 | 
23 |       - name: Build River
24 |         uses: ./.github/actions/install-env
25 |         with:
26 |           python-version: ${{ matrix.python-version }}
27 | 
28 |       - name: Cache River datasets
29 |         uses: actions/cache@v4
30 |         with:
31 |           path: ~/river_data
32 |           key: ${{ runner.os }}
33 | 
34 |       - name: Cache scikit-learn datasets
35 |         uses: actions/cache@v4
36 |         with:
37 |           path: ~/scikit_learn_data
38 |           key: ${{ runner.os }}
39 | 
40 |       - name: Download datasets
41 |         run: |
42 |           poetry run python -c "from river import datasets; datasets.CreditCard().download(); datasets.Elec2().download(); datasets.SMSSpam().download()"
43 |           poetry run python -c "from river import bandit; bandit.datasets.NewsArticles().download()"
44 | 
45 |       - name: pytest
46 |         run: |
47 |           poetry run pytest -m "not datasets" --durations=10 -n logical # Run pytest on all logical CPU cores
48 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | files: river
 2 | repos:
 3 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v4.4.0
 5 |     hooks:
 6 |       - id: check-json
 7 |       - id: check-yaml
 8 | 
 9 |   - repo: https://github.com/astral-sh/ruff-pre-commit
10 |     # Ruff version, should be the same as in poetry.lock
11 |     rev: v0.4.10
12 |     hooks:
13 |       # Run the linter.
14 |       - id: ruff
15 |         types_or: [python, pyi, jupyter]
16 |         args: [--fix]
17 |       # Run the formatter.
18 |       - id: ruff-format
19 |         types_or: [python, pyi, jupyter]
20 | 
21 |   - repo: https://github.com/pre-commit/mirrors-mypy
22 |     # MyPy version, should be the same as in poetry.lock
23 |     rev: v1.13.0
24 |     hooks:
25 |       - id: mypy
26 |         args:
27 |           - "--config-file=pyproject.toml"
28 |           - "--python-version=3.11"
29 |           - "--implicit-optional"
30 | 


--------------------------------------------------------------------------------
/CITATION.bib:
--------------------------------------------------------------------------------
1 | @article{montiel2021river,
2 |   title={River: machine learning for streaming data in Python},
3 |   author={Montiel, Jacob and Halford, Max and Mastelini, Saulo Martiello and Bolmier, Geoffrey and Sourty, Raphael and Vaysse, Robin and Zouitine, Adil and Gomes, Heitor Murilo and Read, Jesse and Abdessalem, Talel and others},
4 |   year={2021}
5 | }
6 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "river"
 3 | version = "0.1.0"
 4 | authors = ["Adil Zouitine <adilzouitinegm@gmail.com>"]
 5 | edition = "2021"
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | [lib]
 9 | name = "river"
10 | path = "rust_src/lib.rs"
11 | crate-type = ["cdylib"]
12 | 
13 | [dependencies]
14 | pyo3 = { version = "0.23.1", features = ["extension-module"] }
15 | watermill = "0.1.1"
16 | bincode = "1.3.3"
17 | serde = { version = "1.0", features = ["derive"] }
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, the river developers
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | COMMIT_HASH := $(shell eval git rev-parse HEAD)
 2 | 
 3 | format:
 4 | 	pre-commit run --all-files
 5 | 
 6 | execute-notebooks:
 7 | 	jupyter nbconvert --execute --to notebook --inplace docs/introduction/*/*.ipynb --ExecutePreprocessor.timeout=-1
 8 | 	jupyter nbconvert --execute --to notebook --inplace docs/recipes/*.ipynb --ExecutePreprocessor.timeout=-1
 9 | 	jupyter nbconvert --execute --to notebook --inplace docs/examples/*.ipynb --ExecutePreprocessor.timeout=-1
10 | 	jupyter nbconvert --execute --to notebook --inplace docs/examples/*/*.ipynb --ExecutePreprocessor.timeout=-1
11 | 
12 | render-notebooks:
13 | 	jupyter nbconvert --to markdown docs/introduction/*/*.ipynb
14 | 	jupyter nbconvert --to markdown docs/recipes/*.ipynb
15 | 	jupyter nbconvert --to markdown docs/examples/*.ipynb
16 | 	jupyter nbconvert --to markdown docs/examples/*/*.ipynb
17 | 
18 | doc: render-notebooks
19 | 	(cd benchmarks && python render.py)
20 | 	python docs/parse river --out docs --verbose
21 | 	mkdocs build
22 | 
23 | livedoc: doc
24 | 	mkdocs serve --dirtyreload
25 | 
26 | rebase:
27 | 	git fetch && git rebase origin/main
28 | 


--------------------------------------------------------------------------------
/benchmarks/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmarks
 2 | 
 3 | ## Installation
 4 | 
 5 | The recommended way to run the benchmarks is to create a dedicated environment for river and its contenders.
 6 | 
 7 | An easy way to achieve that is through [Anaconda](https://docs.conda.io/projects/miniconda/en/latest/). Here is an example of creating an environment for the benchmarks:
 8 | 
 9 | ```sh
10 | conda create --name river-benchmark python=3.10
11 | ```
12 | 
13 | The next step is to clone river if you have not done that already:
14 | 
15 | ```sh
16 | git clone https://github.com/online-ml/river
17 | cd river
18 | ```
19 | 
20 | From the river folder you can run the following command to install the needed dependencies:
21 | 
22 | ```sh
23 | pip install ".[benchmarks]"
24 | ```
25 | 
26 | ## Usage
27 | 
28 | The `run.py` script executes the benchmarks and creates the necessary .csv files for rendering the plots.
29 | 
30 | ```sh
31 | cd benchmarks
32 | python run.py
33 | ```
34 | 
35 | The `render.py` renders the plots from the .csv files and moves them to the `docs/benchmarks` folder.
36 | 
37 | ```sh
38 | python render.py
39 | ```
40 | 
41 | ## Notes: VolpalWabbit
42 | 
43 | Installing Volpal Wabbit (VW) can be tricky sometimes. That is especially true when using apple silicon. If cannot make the pip install guidelines from VW work a workaround is the following. When using anaconda, you can install the recommended dependencies utilized for building VW with conda. You can get more info [here](https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Building#conda) about such dependencies. After that, `pip install volpalwabbit` should work just fine.
44 | 


--------------------------------------------------------------------------------
/benchmarks/model_adapters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/benchmarks/model_adapters/__init__.py


--------------------------------------------------------------------------------
/benchmarks/model_adapters/vw.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from vowpalwabbit import pyvw
 4 | 
 5 | from river import base
 6 | 
 7 | 
 8 | class VW2RiverBase:
 9 |     def __init__(self, *args, **kwargs):
10 |         self.vw = pyvw.Workspace(*args, **kwargs)
11 | 
12 |     def _format_x(self, x):
13 |         return " ".join(f"{k}:{v}" for k, v in x.items())
14 | 
15 | 
16 | class VW2RiverClassifier(VW2RiverBase, base.Classifier):
17 |     def learn_one(self, x, y):
18 |         # Convert {False, True} to {-1, 1}
19 |         y = int(y)
20 |         y_vw = 2 * y - 1
21 | 
22 |         ex = self._format_x(x)
23 |         ex = f"{y_vw} | {ex}"
24 |         self.vw.learn(ex)
25 | 
26 |     def predict_proba_one(self, x):
27 |         ex = "| " + self._format_x(x)
28 |         y_pred = self.vw.predict(ex)
29 |         return {True: y_pred, False: 1.0 - y_pred}
30 | 


--------------------------------------------------------------------------------
/build.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | 
 3 | import numpy
 4 | import setuptools
 5 | from Cython.Build import cythonize
 6 | from setuptools.command.build_ext import build_ext
 7 | from setuptools.errors import CCompilerError
 8 | from setuptools_rust import Binding, RustExtension
 9 | 
10 | ext_modules = cythonize(
11 |     module_list=[
12 |         setuptools.Extension(
13 |             "*",
14 |             sources=["river/**/*.pyx"],
15 |             include_dirs=[numpy.get_include()],
16 |             libraries=[] if platform.system() == "Windows" else ["m"],
17 |             define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
18 |         )
19 |     ],
20 |     compiler_directives={
21 |         "binding": True,
22 |         "embedsignature": True,
23 |     },
24 | )
25 | 
26 | rust_extensions = [RustExtension("river.stats._rust_stats", binding=Binding.PyO3)]
27 | 
28 | 
29 | class BuildFailed(Exception):
30 |     pass
31 | 
32 | 
33 | class ExtBuilder(build_ext):
34 |     def run(self):
35 |         try:
36 |             build_ext.run(self)
37 |         except (FileNotFoundError):
38 |             raise BuildFailed("File not found. Could not compile C extension.")
39 | 
40 |     def build_extension(self, ext):
41 |         try:
42 |             build_ext.build_extension(self, ext)
43 |         except (CCompilerError, ValueError):
44 |             raise BuildFailed("Could not compile C extension.")
45 | 
46 | 
47 | def build(setup_kwargs):
48 |     """
49 |     This function is mandatory in order to build the extensions.
50 |     """
51 |     setup_kwargs.update(
52 |         {
53 |             "ext_modules": ext_modules,
54 |             "cmdclass": {"build_ext": ExtBuilder},
55 |             "rust_extensions": rust_extensions,
56 |             "zip_safe": False,
57 |             "include_package_data": True,
58 |         }
59 |     )
60 | 


--------------------------------------------------------------------------------
/docs/.pages:
--------------------------------------------------------------------------------
 1 | nav:
 2 |   - introduction
 3 |   - recipes
 4 |   - api
 5 |   - examples
 6 |   - faq
 7 |   - releases
 8 |   - benchmarks
 9 |   - license
10 | 


--------------------------------------------------------------------------------
/docs/CNAME:
--------------------------------------------------------------------------------
1 | riverml.xyz
2 | 


--------------------------------------------------------------------------------
/docs/benchmarks/.pages:
--------------------------------------------------------------------------------
1 | title: Benchmarks
2 | 


--------------------------------------------------------------------------------
/docs/css/version-select.css:
--------------------------------------------------------------------------------
1 | @media only screen and (max-width:76.1875em) {
2 |   #version-selector {
3 |     padding: .6rem .8rem;
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/examples/.pages:
--------------------------------------------------------------------------------
1 | title: Examples 🌶️
2 | 


--------------------------------------------------------------------------------
/docs/examples/matrix-factorization-for-recommender-systems/.pages:
--------------------------------------------------------------------------------
1 | title: Matrix factorization for recommender systems
2 | 


--------------------------------------------------------------------------------
/docs/examples/matrix-factorization-for-recommender-systems/part-3.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Part 3"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "To do."
15 |    ]
16 |   }
17 |  ],
18 |  "metadata": {
19 |   "kernelspec": {
20 |    "display_name": "Python 3",
21 |    "language": "python",
22 |    "name": "python3"
23 |   },
24 |   "language_info": {
25 |    "codemirror_mode": {
26 |     "name": "ipython",
27 |     "version": 3
28 |    },
29 |    "file_extension": ".py",
30 |    "mimetype": "text/x-python",
31 |    "name": "python",
32 |    "nbconvert_exporter": "python",
33 |    "pygments_lexer": "ipython3",
34 |    "version": "3.11.0"
35 |   }
36 |  },
37 |  "nbformat": 4,
38 |  "nbformat_minor": 4
39 | }
40 | 


--------------------------------------------------------------------------------
/docs/faq/.pages:
--------------------------------------------------------------------------------
1 | title: FAQ
2 | 


--------------------------------------------------------------------------------
/docs/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/img/favicon.ico


--------------------------------------------------------------------------------
/docs/img/favicon_old.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/img/favicon_old.ico


--------------------------------------------------------------------------------
/docs/img/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/img/icon.png


--------------------------------------------------------------------------------
/docs/img/illustration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/img/illustration.png


--------------------------------------------------------------------------------
/docs/img/illustration_old.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/img/illustration_old.png


--------------------------------------------------------------------------------
/docs/img/online_active_learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/img/online_active_learning.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | template: home.html
3 | title: river
4 | ---
5 | 


--------------------------------------------------------------------------------
/docs/introduction/.pages:
--------------------------------------------------------------------------------
1 | title: Introduction 🍼
2 | nav:
3 |   - installation.md
4 |   - basic-concepts.md
5 |   - getting-started
6 |   - why-use-river.md
7 |   - next-steps.md
8 |   - related-projects.md
9 | 


--------------------------------------------------------------------------------
/docs/introduction/getting-started/concept-drift-detection_files/concept-drift-detection_1_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/introduction/getting-started/concept-drift-detection_files/concept-drift-detection_1_0.png


--------------------------------------------------------------------------------
/docs/introduction/getting-started/concept-drift-detection_files/concept-drift-detection_3_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/introduction/getting-started/concept-drift-detection_files/concept-drift-detection_3_1.png


--------------------------------------------------------------------------------
/docs/introduction/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | River is meant to work with Python 3.8 and above. Installation can be done via `pip`:
 4 | 
 5 | ```sh
 6 | pip install river
 7 | ```
 8 | 
 9 | You can install the latest development version from GitHub, as so:
10 | 
11 | ```sh
12 | pip install git+https://github.com/online-ml/river --upgrade
13 | pip install git+ssh://git@github.com/online-ml/river.git --upgrade  # using SSH
14 | ```
15 | 
16 | This method requires having Cython and Rust installed on your machine.
17 | 
18 | Feel welcome to [open an issue on GitHub](https://github.com/online-ml/river/issues/new) if you are having any trouble.
19 | 


--------------------------------------------------------------------------------
/docs/introduction/next-steps.md:
--------------------------------------------------------------------------------
 1 | # Next steps
 2 | 
 3 | The [Recipes 🍱](/latest/recipes/reading-data) section is made up of small tutorials. Each one explains how to perform mundane tasks, such as measuring the performance of a model, selecting hyperparameters, etc.
 4 | 
 5 | The [Examples 🌶️](/latest/examples/batch-to-online) section contains more involved notebooks with less explanations. Each notebook addresses a particular machine learning problem.
 6 | 
 7 | The [API 📚](/latest/api) section references all the modules, classes, and functions in River. It is automatically generated from the codebase's Python docstrings.
 8 | 
 9 | Feel welcome to [open a discussion](https://github.com/online-ml/river/discussions) if you have a question. Before that you can check out the [FAQ 🙋](/latest/faq), which has answers to recurring questions.
10 | 
11 | The released versions are listed in the [Releases 🏗](/latest/releases) section. Changes that will be part of the next release are listed in the unreleased section of the documentation's development version, which you may find [here](https://riverml.xyz/latest/releases/unreleased/).
12 | 
13 | We recommend checking out [Awesome Online Machine Learning](https://github.com/online-ml/awesome-online-machine-learning) if you want to go deeper. There you will find online machine learning related content: research papers, alternative and complementary software, blog posts, etc.
14 | 


--------------------------------------------------------------------------------
/docs/introduction/related-projects.md:
--------------------------------------------------------------------------------
1 | # Related projects
2 | 
3 | Here is a list of projects which are more or less coupled with River:
4 | 
5 | - [deep-river](https://github.com/online-ml/deep-river) interfaces PyTorch models with River.
6 | - [light-river](https://github.com/online-ml/light-river) implements fast algorithms in rust. 
7 | - [river-extra](https://github.com/online-ml/river-extra) regroups experimental features which have yet to prove themselves to make it into the main River repository. Between us we call this "the arena".
8 | - [Beaver](https://github.com/online-ml/beaver) is an MLOps tool for covering the whole lifecycle of online machine learning models.
9 | 


--------------------------------------------------------------------------------
/docs/introduction/why-use-river.md:
--------------------------------------------------------------------------------
 1 | # Why use River?
 2 | 
 3 | ## Processing one sample at a time
 4 | 
 5 | All the tools in the library can be updated with a single observation at a time. They can therefore be used to process streaming data. Depending on your use case, this might be more convenient than using a batch model.
 6 | 
 7 | ## Adapting to drift
 8 | 
 9 | In the streaming setting, data can evolve. Adaptive methods are specifically designed to be robust against concept drift in dynamic environments. Many of River's models can cope with concept drift.
10 | 
11 | ## General purpose
12 | 
13 | River supports different machine learning tasks, including regression, classification, and unsupervised learning. It can also be used for ad hoc tasks, such as computing online metrics, as well as concept drift detection.
14 | 
15 | ## User experience
16 | 
17 | River is not the only library allowing you to do online machine learning. But it might just be the simplest one to use in the Python ecosystem. River plays nicely with Python dictionaries, therefore making it easy to use in the context of web applications where JSON payloads are aplenty.
18 | 


--------------------------------------------------------------------------------
/docs/javascripts/config.js:
--------------------------------------------------------------------------------
 1 | window.MathJax = {
 2 |   tex: {
 3 |     inlineMath: [["\\(", "\\)"]],
 4 |     displayMath: [["\\[", "\\]"]],
 5 |     processEscapes: true,
 6 |     processEnvironments: true
 7 |   },
 8 |   options: {
 9 |     ignoreHtmlClass: ".*|",
10 |     processHtmlClass: "arithmatex"
11 |   }
12 | };
13 | 
14 | document$.subscribe(() => {
15 |   MathJax.typesetPromise()
16 | })
17 | 


--------------------------------------------------------------------------------
/docs/javascripts/tablesort.js:
--------------------------------------------------------------------------------
1 | document$.subscribe(function() {
2 |   var tables = document.querySelectorAll("article table:not([class])")
3 |   tables.forEach(function(table) {
4 |     new Tablesort(table)
5 |   })
6 | })
7 | 


--------------------------------------------------------------------------------
/docs/license/.pages:
--------------------------------------------------------------------------------
1 | title: License 📝
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/license/license.md:
--------------------------------------------------------------------------------
1 | # License
2 | 
3 | River is free and open-source software licensed under the [3-clause BSD license](https://github.com/online-ml/river/blob/main/LICENSE).


--------------------------------------------------------------------------------
/docs/overrides/partials/footer.html:
--------------------------------------------------------------------------------
 1 | {% import "partials/language.html" as lang with context %}
 2 | 
 3 | <!-- Application footer -->
 4 | <footer class="md-footer">
 5 | 
 6 |   <!-- Further information -->
 7 |   <div class="md-footer-meta md-typeset">
 8 |     <div class="md-footer-meta__inner md-grid">
 9 | 
10 |       <!-- Copyright and theme information -->
11 |       <div class="md-footer-copyright">
12 |         {% if config.copyright %}
13 |           <div class="md-footer-copyright__highlight">
14 |             {{ config.copyright }}
15 |           </div>
16 |         {% endif %}
17 |         Made with
18 |         <a
19 |           href="https://squidfunk.github.io/mkdocs-material/"
20 |           target="_blank" rel="noopener"
21 |         >
22 |           Material for MkDocs
23 |         </a>
24 |       </div>
25 | 
26 |       <!-- Social links -->
27 |       {% include "partials/social.html" %}
28 |     </div>
29 |   </div>
30 | </footer>
31 | 


--------------------------------------------------------------------------------
/docs/overrides/partials/integrations/analytics.html:
--------------------------------------------------------------------------------
1 | <script
2 |   defer
3 |   src="https://eu.umami.is/script.js"
4 |   data-website-id="b9f84709-bbc5-48b5-b43b-ef0bcc45a1f8"
5 | ></script>
6 | 


--------------------------------------------------------------------------------
/docs/recipes/.pages:
--------------------------------------------------------------------------------
 1 | title: Recipes 🌮
 2 | nav:
 3 |     - reading-data.md
 4 |     - model-evaluation.md
 5 |     - pipelines.md
 6 |     - feature-extraction.md
 7 |     - hyperparameter-tuning.md
 8 |     - mini-batching.md
 9 |     - on-hoeffding-trees.md
10 |     - active-learning.md
11 |     - bandits-101.md
12 |     - cloning-and-mutating.md
13 |     - rolling-computations.md
14 | 


--------------------------------------------------------------------------------
/docs/recipes/feature-extraction.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Feature extraction\n",
 8 |     "\n",
 9 |     "To do."
10 |    ]
11 |   }
12 |  ],
13 |  "metadata": {
14 |   "kernelspec": {
15 |    "display_name": "Python 3",
16 |    "language": "python",
17 |    "name": "python3"
18 |   },
19 |   "language_info": {
20 |    "codemirror_mode": {
21 |     "name": "ipython",
22 |     "version": 3
23 |    },
24 |    "file_extension": ".py",
25 |    "mimetype": "text/x-python",
26 |    "name": "python",
27 |    "nbconvert_exporter": "python",
28 |    "pygments_lexer": "ipython3",
29 |    "version": "3.11.0"
30 |   }
31 |  },
32 |  "nbformat": 4,
33 |  "nbformat_minor": 4
34 | }
35 | 


--------------------------------------------------------------------------------
/docs/recipes/hyperparameter-tuning.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Hyperparameter tuning\n",
 8 |     "\n",
 9 |     "To do."
10 |    ]
11 |   }
12 |  ],
13 |  "metadata": {
14 |   "kernelspec": {
15 |    "display_name": "Python 3",
16 |    "language": "python",
17 |    "name": "python3"
18 |   },
19 |   "language_info": {
20 |    "codemirror_mode": {
21 |     "name": "ipython",
22 |     "version": 3
23 |    },
24 |    "file_extension": ".py",
25 |    "mimetype": "text/x-python",
26 |    "name": "python",
27 |    "nbconvert_exporter": "python",
28 |    "pygments_lexer": "ipython3",
29 |    "version": "3.11.0"
30 |   }
31 |  },
32 |  "nbformat": 4,
33 |  "nbformat_minor": 4
34 | }
35 | 


--------------------------------------------------------------------------------
/docs/recipes/model-evaluation.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Model evaluation\n",
 8 |     "\n",
 9 |     "To do."
10 |    ]
11 |   }
12 |  ],
13 |  "metadata": {
14 |   "kernelspec": {
15 |    "display_name": "Python 3",
16 |    "language": "python",
17 |    "name": "python3"
18 |   },
19 |   "language_info": {
20 |    "codemirror_mode": {
21 |     "name": "ipython",
22 |     "version": 3
23 |    },
24 |    "file_extension": ".py",
25 |    "mimetype": "text/x-python",
26 |    "name": "python",
27 |    "nbconvert_exporter": "python",
28 |    "pygments_lexer": "ipython3",
29 |    "version": "3.11.0"
30 |   }
31 |  },
32 |  "nbformat": 4,
33 |  "nbformat_minor": 4
34 | }
35 | 


--------------------------------------------------------------------------------
/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_21_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_21_0.png


--------------------------------------------------------------------------------
/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_23_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_23_0.png


--------------------------------------------------------------------------------
/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_25_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_25_0.png


--------------------------------------------------------------------------------
/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_27_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_27_0.png


--------------------------------------------------------------------------------
/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_29_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_29_0.png


--------------------------------------------------------------------------------
/docs/releases/.pages:
--------------------------------------------------------------------------------
1 | title: Releases
2 | sort_type: natural
3 | order: desc
4 | 


--------------------------------------------------------------------------------
/docs/releases/0.0.2.md:
--------------------------------------------------------------------------------
 1 | # 0.0.2 - 2019-02-13
 2 | 
 3 | - [PyPI](https://pypi.org/project/river/0.0.2/)
 4 | - [GitHub](https://github.com/online-ml/river/releases/tag/0.0.2)
 5 | 
 6 | ## compat
 7 | 
 8 | - Added `sklearn` wrappers.
 9 | 
10 | ## ensemble
11 | 
12 | - Added `ensemble.HedgeClassifier`.
13 | 
14 | ## feature_selection
15 | 
16 | - Added `feature_selection.RandomDiscarder`.
17 | 
18 | ## feature_extraction
19 | 
20 | - Added `feature_extraction.TargetEncoder`.
21 | 
22 | ## impute
23 | 
24 | - Added `impute.NumericImputer`.
25 | 
26 | ## optim
27 | 
28 | - Added `optim.AbsoluteLoss`.
29 | - Added `optim.HingeLoss`.
30 | - Added `optim.EpsilonInsensitiveHingeLoss`.
31 | 
32 | ## stats
33 | 
34 | - Added `stats.NUnique`.
35 | - Added `stats.Min`.
36 | - Added `stats.Max`.
37 | - Added `stats.PeakToPeak`.
38 | - Added `stats.Kurtosis`.
39 | - Added `stats.Skew`.
40 | - Added `stats.Sum`.
41 | - Added `stats.EWMean`.
42 | - Made sure the running statistics produce the same results as `pandas.DataFrame.rolling` method.
43 | 


--------------------------------------------------------------------------------
/docs/releases/0.10.1.md:
--------------------------------------------------------------------------------
1 | # 0.10.1 - 2022-02-05
2 | 
3 | ## evaluate
4 | 
5 | `evaluate.progressive_val_score` can now handle models which use `**kwargs` in their `learn_one` and `predict_one` methods. For instance, this is useful for `reco.Ranker` models which require passing a user and an item.
6 | 


--------------------------------------------------------------------------------
/docs/releases/0.11.1.md:
--------------------------------------------------------------------------------
1 | # 0.11.1 - 2022-06-06
2 | 
3 | A small release to introduce benchmarks.
4 | 
5 | ## anomaly
6 | 
7 | - Fixed a bug where anomaly filters were never updated.
8 | 


--------------------------------------------------------------------------------
/docs/releases/0.12.1.md:
--------------------------------------------------------------------------------
1 | # 0.12.1 - 2022-09-02
2 | 
3 | ## base
4 | 
5 | - Fix the way the `clone` method handles positional arguments.
6 | 


--------------------------------------------------------------------------------
/docs/releases/0.13.0.md:
--------------------------------------------------------------------------------
 1 | # 0.13.0 - 2022-09-15
 2 | 
 3 | ## compose
 4 | 
 5 | - `compose.TransformerUnion` parts can now be accessed by index as well as by name.
 6 | 
 7 | ## stats
 8 | 
 9 | - Added the `LossyCount` for tracking frequent itemsets. This implementation also supports a forgetting factor to reduce the influence of old elements.
10 | - The following statistics are now implemented in Rust:
11 |   - `Quantile`
12 |   - `EWMean`
13 |   - `EWVar`
14 |   - `IQR`
15 |   - `Kurtosis`
16 |   - `PeaktoPeak`
17 |   - `Skew`
18 |   - `RollingQuantile`
19 |   - `RollingIQR`
20 | 
21 | ## stream
22 | 
23 | - Implemented `stream.TwitchChatStream`.
24 | 


--------------------------------------------------------------------------------
/docs/releases/0.14.0.md:
--------------------------------------------------------------------------------
 1 | # 0.14.0 - 2022-10-26
 2 | 
 3 | - Introducing the `bandit` module for running multi-armed bandits
 4 | - Introducing the `sketch` module with summarization tools and data sketches working in a streaming fashion!
 5 | 
 6 | ## bandit
 7 | 
 8 | - Added `bandit.EpsilonGreedy`.
 9 | - Added `bandit.UCB`.
10 | - Added `bandit.ThomsonSampling`.
11 | - Added a `bandit.base` module.
12 | - Added `bandit.envs.CandyCaneContest`, which implements the Gym interface.
13 | - Added `bandit.envs.KArmedTestbed`, which implements the Gym interface.
14 | - Added `bandit.evaluate` for basic benchmarking of bandit policies on a Gym environment.
15 | 
16 | ## drift
17 | 
18 | - Exposed more parameters in ADWIN: `clock`, `max_buckets`, `min_window_length`, and `grace_period`.
19 | 
20 | ## model_selection
21 | 
22 | - Added `model_selection.BanditRegressor`, which is a generic model selection method that works with any bandit policy.
23 | - Removed `model_selection.EpsilonGreedyRegressor` due to the addition of `model_selection.BanditRegressor`.
24 | - Removed `model_selection.UCBRegressor` due to the addition of `model_selection.BanditRegressor`.
25 | 
26 | ## proba
27 | 
28 | - Added `proba.Beta`.
29 | - Added a `sample` method to each distribution.
30 | - Added a `mode` property to each distribution.
31 | - Replaced the `pmf` and `pdf` methods with a `__call__` method.
32 | 
33 | ## sketch
34 | 
35 | - Moved `misc.Histogram` to `sketch.Histogram`.
36 | - Moved `stats.LossyCount` to `sketch.HeavyHitters` and update its API to better match `collections.Counter`.
37 | - Added missing return `self` in `HeavyHitters`.
38 | - Added the Count-Min Sketch (`sketch.Counter`) algorithm for approximate element counting.
39 | - Added an implementation of Bloom filter (`sketch.Set`) to provide approximate set-like operations.
40 | 


--------------------------------------------------------------------------------
/docs/releases/0.16.0.md:
--------------------------------------------------------------------------------
 1 | # 0.16.0 - 2023-05-08
 2 | 
 3 | Added wheels for Python 3.11.
 4 | 
 5 | ## feature_extraction
 6 | 
 7 | - `feature_extraction.Agg` and `feature_extraction.TargetAgg` can now be passed an optional `t` in its `learn_one` method, which allows it to work with `utils.TimeRolling`.
 8 | 
 9 | ## metrics
10 | 
11 | - Added `metrics.MAPE`.
12 | - Added `metrics.RollingROCAUC`.
13 | 
14 | ## preprocessing
15 | 
16 | - Added `preprocessing.GaussianRandomProjector`.
17 | - Added `preprocessing.SparseRandomProjector`.
18 | 
19 | ## stats
20 | 
21 | - Fixed randomness issue with the first few outputs of `stats.Quantile`.
22 | 


--------------------------------------------------------------------------------
/docs/releases/0.17.0.md:
--------------------------------------------------------------------------------
 1 | # 0.17.0 - 2023-05-27
 2 | 
 3 | ## bandit
 4 | 
 5 | - Bandit policies now return a single arm when the `pull` method is called, instead of yielding or one more arms at a time. This is simpler to understand. We will move back to multi-armed pulls in the future.
 6 | - Added `bandit.Exp3`.
 7 | - `bandit.UCB` and `bandit.Exp3` have an extra `reward_scaler` parameter, which can be any object that inherits from `compose.TargetTransformRegressor`. This allows scaling rewards before updating arms.
 8 | 
 9 | ## compose
10 | 
11 | - `compose.TransformerProduct` now correctly returns a `compose.TransformerUnion` when a transformer is added to it.
12 | - Fixed `compose.TransformerProduct`'s `transform_many` behavior.
13 | - `compose.TransformerUnion` and `compose.TransformerProduct` will now clone the provided estimators, so that shallow copies aren't shared in different places.
14 | 
15 | ## model_selection
16 | 
17 | - Added `model_selection.BanditClassifier`, which is the classification equivalent to `bandit.BanditRegressor`. Both are methods to perform online model selection via a bandit policy.
18 | 
19 | ## multioutput
20 | 
21 | - `metrics.multioutput.MacroAverage` and `metrics.multioutput.MicroAverage` now loop over the keys of `y_true` instead of `y_pred`. This ensures a `KeyError` is correctly raised if `y_pred` is missing an output that is present in `y_true`.
22 | 
23 | ## preprocessing
24 | 
25 | - Added `preprocessing.TargetMinMaxScaler`, which operates the same as `preprocessing.TargetStandardScaler`, but instead uses min-max scaling.
26 | 


--------------------------------------------------------------------------------
/docs/releases/0.20.1.md:
--------------------------------------------------------------------------------
1 | # 0.20.1 - 2023-11-09
2 | 
3 | Dummy release to make wheels available. No actual difference with v0.20.0.
4 | 


--------------------------------------------------------------------------------
/docs/releases/0.21.0.md:
--------------------------------------------------------------------------------
 1 | # 0.21.0 - 2023-12-04
 2 | 
 3 | - The `learn_one` and `learn_many` methods of each estimator don't not return anything anymore. This is to emphasize that the estimators are stateful.
 4 | - The `update` and `revert` method of classes that have also cease to return anything.
 5 | - `sample_weight` has been renamed to `w`.
 6 | 
 7 | ## covariance
 8 | 
 9 | - Fixed an issue where `update_many` would reset `covariance.EmpiricalCovariance` each time it was called.
10 | 


--------------------------------------------------------------------------------
/docs/releases/0.21.1.md:
--------------------------------------------------------------------------------
 1 | # 0.21.1 - 2024-03-28
 2 | 
 3 | This release should fix some of the installation issues when building the River wheel from scratch.
 4 | 
 5 | ## anomaly
 6 | 
 7 | - Added `PredictiveAnomalyDetection`, a semi-supervised technique that employs a predictive model for anomaly detection.
 8 | 
 9 | ## drift
10 | 
11 | - Added `FHDDM` drift detector.
12 | - Added a `iter_polars` function to iterate over the rows of a polars DataFrame.
13 | 
14 | ## neighbors
15 | 
16 | - Simplified `neighbors.SWINN` to avoid recursion limit and pickling issues.
17 | 


--------------------------------------------------------------------------------
/docs/releases/0.21.2.md:
--------------------------------------------------------------------------------
 1 | # 0.21.2 - 2024-07-08
 2 | 
 3 | This release makes Polars an optional dependency instead of a required one.
 4 | 
 5 | ## cluster
 6 | 
 7 | - Added `ODAC` (Online Divisive-Agglomerative Clustering) for clustering time series.
 8 | 
 9 | ## forest
10 | 
11 | - Fix error in `forest.ARFClassifer` and `forest.ARFRegressor` where the algorithms would crash in case the number of features available for learning went below the value of the `max_features` parameter (#1560).
12 | 


--------------------------------------------------------------------------------
/docs/releases/0.22.0.md:
--------------------------------------------------------------------------------
 1 | # 0.22.0 - 2024-11-23
 2 | 
 3 | - Dropped support for Python 3.9 and added support for Python 3.13.
 4 | - The methods `learn_one`, `learn_many`, `update`, `revert`, and `append` now return `None`.
 5 | - The units used in River have been corrected to be based on powers of 2 (KiB, MiB). This only changes the display, the behaviour is unchanged.
 6 | 
 7 | ## cluster
 8 | 
 9 | - Update the description of `cluster.ODAC`.
10 | - Change `draw` in `cluster.ODAC` to draw the hierarchical cluster's structure as a Graphviz graph.
11 | - Add `render_ascii` in `cluster.ODAC` to render the hierarchical cluster's structure in text format.
12 | - Work with `stats.Var` in `cluster.ODAC` when cluster has only one time series.
13 | 
14 | ## drift
15 | 
16 | - Make `drift.ADWIN` comply with the reference MOA implementation.
17 | 
18 | ## feature extraction
19 | 
20 | - The mini-batch methods for `feature_extraction.TFIDF` now systematically raise an exception, as they are not implemented.
21 | 
22 | ## stats
23 | 
24 | - Removed the unexported class `stats.CentralMoments`.
25 | 
26 | ## tree
27 | 
28 | - Instead of letting trees grow indefinitely, setting the `max_depth` parameter to `None` will stop the trees from growing when they reach the system recursion limit.
29 | - Added `tree.LASTClassifier` (Local Adaptive Streaming Tree Classifier).
30 | 
31 | ## stream
32 | 
33 | - `stream.iter_arff` now supports blank values (treated as missing values).
34 | 


--------------------------------------------------------------------------------
/docs/releases/0.3.0.md:
--------------------------------------------------------------------------------
 1 | # 0.3.0 - 2019-06-23
 2 | 
 3 | - [PyPI](https://pypi.org/project/river/0.3.0/)
 4 | - [GitHub](https://github.com/online-ml/river/releases/tag/0.3.0)
 5 | 
 6 | ## datasets
 7 | 
 8 | - Added `datasets.load_chick_weights`.
 9 | 
10 | ## decomposition
11 | 
12 | - Added `decomposition.LDA`.
13 | 
14 | ## ensemble
15 | 
16 | - Added `ensemble.HedgeRegressor`.
17 | - Added `ensemble.StackingBinaryClassifier`.
18 | 
19 | ## metrics
20 | 
21 | - Added `metrics.FBeta`
22 | - Added `metrics.MacroFBeta`
23 | - Added `metrics.MicroFBeta`
24 | - Added `metrics.MultiFBeta`
25 | - Added `metrics.RollingFBeta`
26 | - Added `metrics.RollingMacroFBeta`
27 | - Added `metrics.RollingMicroFBeta`
28 | - Added `metrics.RollingMultiFBeta`
29 | - Added `metrics.Jaccard`
30 | - Added `metrics.RollingConfusionMatrix`
31 | - Added `metrics.RegressionMultiOutput`
32 | - Added `metrics.MCC`
33 | - Added `metrics.RollingMCC`
34 | - Added `metrics.ROCAUC`
35 | - Renamed `metrics.F1Score` to `metrics.F1`.
36 | 
37 | ## multioutput
38 | 
39 | - Added `multioutput.ClassifierChain`.
40 | - Added `multioutput.RegressorChain`.
41 | 
42 | ## optim
43 | 
44 | - Added `optim.QuantileLoss`
45 | - Added `optim.MiniBatcher`.
46 | 
47 | ## preprocessing
48 | 
49 | - Added `preprocessing.Normalizer`.
50 | 
51 | ## proba
52 | 
53 | - Added `proba.Multinomial`.
54 | 


--------------------------------------------------------------------------------
/docs/releases/0.4.3.md:
--------------------------------------------------------------------------------
 1 | # 0.4.3 - 2019-10-27
 2 | 
 3 | - [PyPI](https://pypi.org/project/river/0.4.3/)
 4 | - [GitHub](https://github.com/online-ml/river/releases/tag/0.4.3)
 5 | 
 6 | ## base
 7 | 
 8 | - Model that inherit from `base.Wrapper` (e.g. `tree.RandomForestClassifier`) can now be pickled.
 9 | 
10 | ## datasets
11 | 
12 | - Added `datasets.fetch_credit_card`.
13 | 
14 | ## utils
15 | 
16 | - Added the `utils.math` sub-module.
17 | 
18 | ## tree
19 | 
20 | - Fixed the `debug_one` method of `tree.DecisionTreeClassifier`.
21 | 


--------------------------------------------------------------------------------
/docs/releases/0.4.4.md:
--------------------------------------------------------------------------------
 1 | # 0.4.4 - 2019-11-11
 2 | 
 3 | - [PyPI](https://pypi.org/project/river/0.4.4/)
 4 | - [GitHub](https://github.com/online-ml/river/releases/tag/0.4.4)
 5 | 
 6 | This release was mainly made to provide access to `wheels <https://pythonwheels.com/>`_ for Windows and MacOS.
 7 | 
 8 | ## ensemble
 9 | 
10 | - Added `ensemble.AdaBoostClassifier`.
11 | 
12 | ## linear_model
13 | 
14 | - Added a `clip_gradient` parameter to `linear_model.LinearRegression` and `linear_model.LogisticRegression`. Gradient clipping was already implemented, but the maximum absolute value can now be set by the user.
15 | - The `intercept_lr` parameter of `linear_model.LinearRegression` and `linear_model.LogisticRegression` can now be passed an instance of `optim.schedulers.Scheduler` as well as a `float`.
16 | 
17 | ## metrics
18 | 
19 | - Fixed `metrics.SMAPE`, the implementation was missing a multiplication by 2.
20 | 
21 | ## optim
22 | 
23 | - Added `optim.schedulers.Optimal` produces results that are identical to `sklearn.linear_model.SGDRegressor` and `sklearn.linear_model.SGDClassifier` when setting their `learning_rate` parameter to `'optimal'`.
24 | 
25 | ## time_series
26 | 
27 | - Added `time_series.SNARIMAX`, a generic model which encompasses well-known time series models such as ARIMA and NARX.
28 | 


--------------------------------------------------------------------------------
/docs/releases/0.5.1.md:
--------------------------------------------------------------------------------
 1 | # 0.5.1 - 2020-03-29
 2 | 
 3 | - [PyPI](https://pypi.org/project/river/0.5.1/)
 4 | - [GitHub](https://github.com/online-ml/river/releases/tag/0.5.1)
 5 | 
 6 | ## compose
 7 | 
 8 | - `compose.Pipeline` and `compose.TransformerUnion` now variadic arguments as input instead of a list. This doesn't change anything when using the shorthand operators `|` and `+`.
 9 | 
10 | ## model_selection
11 | 
12 | - Removed `model_selection.successive_halving`
13 | - Added `model_selection.SuccessiveHalvingRegressor` and `model_selection.SuccessiveHalvingClassifier`
14 | 
15 | ## stream
16 | 
17 | - Added a `copy` parameter to `stream.simulate_qa` in order to handle unwanted feature modifications.
18 | 
19 | ## tree
20 | 
21 | - Added a `curtail_under` parameter to `tree.DecisionTreeClassifier`.
22 | - The speed and accuracy of both `tree.DecisionTreeClassifier` and `tree.RandomForestClassifier` has been slightly improved for numerical attributes.
23 | - The esthetics of the `tree.DecisionTreeClassifier.draw` method have been improved.
24 | 


--------------------------------------------------------------------------------
/docs/releases/0.6.1.md:
--------------------------------------------------------------------------------
1 | # 0.6.1 - 2020-06-10
2 | 
3 | ## compose
4 | 
5 | - Fixed a bug that occurred when part of a `compose.Transformer` was a `compose.Pipeline` and wasn't properly handled.
6 | 


--------------------------------------------------------------------------------
/docs/releases/0.7.0.md:
--------------------------------------------------------------------------------
1 | # 0.7.0 - 2021-04-16
2 | 
3 | Alas, no release notes for this one.
4 | 


--------------------------------------------------------------------------------
/docs/releases/0.7.1.md:
--------------------------------------------------------------------------------
 1 | # 0.7.1 - 2021-06-13
 2 | 
 3 | Fixed an issue where scikit-learn was imported in `sam_knn.py` but wasn't specified as a dependency.
 4 | 
 5 | ## expert
 6 | 
 7 | - Each expert model will now raise a `NotEnoughModels` exception if only a single model is passed.
 8 | 
 9 | ## stream
10 | 
11 | - Added `drop_nones` parameter to `stream.iter_csv`.
12 | 


--------------------------------------------------------------------------------
/docs/releases/0.8.0.md:
--------------------------------------------------------------------------------
 1 | # 0.8.0 - 2021-08-31
 2 | 
 3 | ## base
 4 | 
 5 | - The `predict_many` and `predict_proba_many` methods have been removed from `base.Classifier`. They're part of `base.MiniBatchClassifier`.
 6 | 
 7 | ## ensemble
 8 | 
 9 | - Implemented `ensemble.VotingClassifier`.
10 | - Implemented `ensemble.SRPRegressor`.
11 | 
12 | ## meta
13 | 
14 | - Renamed `meta.TransformedTargetRegressor` to `meta.TargetTransformRegressor`.
15 | - Added `meta.TargetStandardScaler`.
16 | 
17 | ## preprocessing
18 | 
19 | - Added a `with_std` parameter to `StandardScaler`.
20 | 
21 | ## rules
22 | 
23 | - Added `rules.AMRules`
24 | 
25 | ## stats
26 | 
27 | - Make `stats.RollingQuantile` match the default behavior of Numpy's `quantile` function.
28 | 
29 | ## tree
30 | 
31 | - Unified base class structure applied to all tree models.
32 | - Bug fixes.
33 | - Added `tree.SGTClassifier` and `tree.SGTRegressor`.
34 | 


--------------------------------------------------------------------------------
/docs/releases/unreleased.md:
--------------------------------------------------------------------------------
1 | # Unreleased
2 | 
3 | ## base
4 | 
5 | - The `tags` and `more_tags` properties of `base.Estimator` are now both a set of strings.
6 | - The `base` module is now fully type-annotated. Some type hints have changed, but this does not impact the behaviour of the code. For instance, the regression target is now indicated as a float instead of a Number.
7 | - `base.Ensemble`, `base.Wrapper`, and `base.WrapperEnsemble` became generic with regard to the type they encapsulate.
8 | 


--------------------------------------------------------------------------------
/river/__init__.py:
--------------------------------------------------------------------------------
 1 | """River is a library for incremental learning. Incremental learning is a machine learning regime
 2 | where the observations are made available one by one. It is also known as online learning,
 3 | iterative learning, or sequential learning. This is in contrast to batch learning where all the
 4 | data is processed at once. Incremental learning is desirable when the data is too big to fit in
 5 | memory, or simply when it isn't available all at once. river's API is heavily inspired from that of
 6 | scikit-learn, enough so that users who are familiar with scikit-learn should feel right at home.
 7 | """
 8 | 
 9 | from __future__ import annotations
10 | 
11 | from .__version__ import __version__  # noqa: F401
12 | 


--------------------------------------------------------------------------------
/river/__version__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | __version__ = "0.22.0"
4 | 


--------------------------------------------------------------------------------
/river/active/__init__.py:
--------------------------------------------------------------------------------
1 | """Online active learning."""
2 | 
3 | from __future__ import annotations
4 | 
5 | from . import base
6 | from .entropy import EntropySampler
7 | 
8 | __all__ = ["base", "EntropySampler"]
9 | 


--------------------------------------------------------------------------------
/river/anomaly/__init__.py:
--------------------------------------------------------------------------------
 1 | """Anomaly detection.
 2 | 
 3 | Estimators in the `anomaly` module have a bespoke API. Each anomaly detector has a `score_one`
 4 | method instead of a `predict_one` method. This method returns an anomaly score. Normal observations
 5 | should have a low score, whereas anomalous observations should have a high score. The range of the
 6 | scores is relative to each estimator.
 7 | 
 8 | Anomaly detectors are usually unsupervised, in that they analyze the distribution of the features
 9 | they are shown. But River also has a notion of supervised anomaly detectors. These analyze the
10 | distribution of a target variable, and optionally include the distribution of the features as well. They are useful for detecting labelling anomalies, which can be detrimental if they learned by a
11 | model.
12 | 
13 | """
14 | 
15 | from __future__ import annotations
16 | 
17 | from . import base
18 | from .filter import QuantileFilter, ThresholdFilter
19 | from .gaussian import GaussianScorer
20 | from .hst import HalfSpaceTrees
21 | from .lof import LocalOutlierFactor
22 | from .pad import PredictiveAnomalyDetection
23 | from .sad import StandardAbsoluteDeviation
24 | from .svm import OneClassSVM
25 | 
26 | __all__ = [
27 |     "base",
28 |     "AnomalyDetector",
29 |     "GaussianScorer",
30 |     "HalfSpaceTrees",
31 |     "OneClassSVM",
32 |     "QuantileFilter",
33 |     "StandardAbsoluteDeviation",
34 |     "ThresholdFilter",
35 |     "LocalOutlierFactor",
36 |     "PredictiveAnomalyDetection",
37 | ]
38 | 


--------------------------------------------------------------------------------
/river/anomaly/test_hst.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | 
 4 | def test_missing_features():
 5 |     """Checks that HalfSpaceTrees works even if a feature is missing.
 6 | 
 7 |     >>> import random
 8 |     >>> from river import anomaly
 9 |     >>> from river import compose
10 |     >>> from river import datasets
11 |     >>> from river import metrics
12 |     >>> from river import preprocessing
13 | 
14 |     >>> model = compose.Pipeline(
15 |     ...     preprocessing.MinMaxScaler(),
16 |     ...     anomaly.HalfSpaceTrees(seed=42)
17 |     ... )
18 | 
19 |     >>> auc = metrics.ROCAUC()
20 | 
21 |     >>> features = list(next(iter(datasets.CreditCard()))[0].keys())
22 |     >>> random.seed(42)
23 | 
24 |     >>> for x, y in datasets.CreditCard().take(8000):
25 |     ...     del x[random.choice(features)]
26 |     ...     score = model.score_one(x)
27 |     ...     model.learn_one(x, y)
28 |     ...     auc.update(y, score)
29 | 
30 |     >>> auc
31 |     ROCAUC: 88.68%
32 | 
33 |     """
34 | 


--------------------------------------------------------------------------------
/river/anomaly/test_svm.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | import pytest
 6 | from sklearn import linear_model as sklm
 7 | 
 8 | from river import anomaly, datasets, optim
 9 | 
10 | tests = {
11 |     "Vanilla": (
12 |         {"optimizer": optim.SGD(1e-2), "nu": 0.5},
13 |         {"learning_rate": "constant", "eta0": 1e-2, "nu": 0.5},
14 |     ),
15 |     "No intercept": (
16 |         {"optimizer": optim.SGD(1e-2), "nu": 0.5, "intercept_lr": 0.0},
17 |         {"learning_rate": "constant", "eta0": 1e-2, "nu": 0.5, "fit_intercept": False},
18 |     ),
19 | }
20 | 
21 | 
22 | @pytest.mark.parametrize(
23 |     "river_params, sklearn_params",
24 |     tests.values(),
25 |     ids=tests.keys(),
26 | )
27 | def test_sklearn_coherence(river_params, sklearn_params):
28 |     """Checks that the sklearn and river implementations produce the same results."""
29 | 
30 |     rv = anomaly.OneClassSVM(**river_params)
31 |     sk = sklm.SGDOneClassSVM(**sklearn_params)
32 | 
33 |     for x, _ in datasets.Phishing().take(100):
34 |         rv.learn_one(x)
35 |         sk.partial_fit([list(x.values())])
36 | 
37 |     for i, w in enumerate(rv.weights.values()):
38 |         assert math.isclose(w, sk.coef_[i])
39 | 


--------------------------------------------------------------------------------
/river/api.py:
--------------------------------------------------------------------------------
 1 | """River API module."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from . import (
 6 |     active,
 7 |     anomaly,
 8 |     bandit,
 9 |     base,
10 |     cluster,
11 |     compat,
12 |     compose,
13 |     conf,
14 |     covariance,
15 |     datasets,
16 |     drift,
17 |     dummy,
18 |     ensemble,
19 |     evaluate,
20 |     facto,
21 |     feature_extraction,
22 |     feature_selection,
23 |     forest,
24 |     imblearn,
25 |     linear_model,
26 |     metrics,
27 |     misc,
28 |     model_selection,
29 |     multiclass,
30 |     multioutput,
31 |     naive_bayes,
32 |     neighbors,
33 |     neural_net,
34 |     optim,
35 |     preprocessing,
36 |     proba,
37 |     reco,
38 |     rules,
39 |     sketch,
40 |     stats,
41 |     stream,
42 |     time_series,
43 |     tree,
44 |     utils,
45 | )
46 | 
47 | __all__ = [
48 |     "active",
49 |     "anomaly",
50 |     "base",
51 |     "bandit",
52 |     "cluster",
53 |     "compat",
54 |     "compose",
55 |     "conf",
56 |     "covariance",
57 |     "datasets",
58 |     "dummy",
59 |     "drift",
60 |     "ensemble",
61 |     "evaluate",
62 |     "facto",
63 |     "feature_extraction",
64 |     "feature_selection",
65 |     "forest",
66 |     "imblearn",
67 |     "linear_model",
68 |     "metrics",
69 |     "misc",
70 |     "model_selection",
71 |     "multiclass",
72 |     "multioutput",
73 |     "naive_bayes",
74 |     "neighbors",
75 |     "neural_net",
76 |     "optim",
77 |     "preprocessing",
78 |     "proba",
79 |     "reco",
80 |     "rules",
81 |     "sketch",
82 |     "stats",
83 |     "stream",
84 |     "time_series",
85 |     "tree",
86 |     "utils",
87 | ]
88 | 


--------------------------------------------------------------------------------
/river/bandit/__init__.py:
--------------------------------------------------------------------------------
 1 | """Multi-armed bandit (MAB) policies.
 2 | 
 3 | The bandit policies in River have a generic API. This allows them to be used in a variety of
 4 | situations. For instance, they can be used for model selection
 5 | (see `model_selection.BanditRegressor`).
 6 | 
 7 | """
 8 | 
 9 | from __future__ import annotations
10 | 
11 | from . import base, datasets, envs
12 | from .bayes_ucb import BayesUCB
13 | from .epsilon_greedy import EpsilonGreedy
14 | from .evaluate import evaluate, evaluate_offline
15 | from .exp3 import Exp3
16 | from .lin_ucb import LinUCBDisjoint
17 | from .random import RandomPolicy
18 | from .thompson import ThompsonSampling
19 | from .ucb import UCB
20 | 
21 | __all__ = [
22 |     "base",
23 |     "datasets",
24 |     "envs",
25 |     "evaluate",
26 |     "evaluate_offline",
27 |     "BayesUCB",
28 |     "EpsilonGreedy",
29 |     "Exp3",
30 |     "LinUCBDisjoint",
31 |     "ThompsonSampling",
32 |     "UCB",
33 |     "RandomPolicy",
34 | ]
35 | 


--------------------------------------------------------------------------------
/river/bandit/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | from .base import BanditDataset
4 | from .news import NewsArticles
5 | 
6 | __all__ = ["BanditDataset", "NewsArticles"]
7 | 


--------------------------------------------------------------------------------
/river/bandit/datasets/base.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import abc
 4 | 
 5 | from river import bandit, datasets
 6 | 
 7 | 
 8 | class BanditDataset(datasets.base.Dataset):
 9 |     """Base class for bandit datasets.
10 | 
11 |     Parameters
12 |     ----------
13 |     n_features
14 |         Number of features in the dataset.
15 |     n_samples
16 |         Number of samples in the dataset.
17 |     n_classes
18 |         Number of classes in the dataset, only applies to classification datasets.
19 |     n_outputs
20 |         Number of outputs the target is made of, only applies to multi-output datasets.
21 |     sparse
22 |         Whether the dataset is sparse or not.
23 | 
24 |     """
25 | 
26 |     def __init__(
27 |         self,
28 |         n_features,
29 |         n_samples=None,
30 |         n_classes=None,
31 |         n_outputs=None,
32 |         sparse=False,
33 |     ):
34 |         super().__init__(
35 |             task="BANDIT",
36 |             n_features=n_features,
37 |             n_samples=n_samples,
38 |             n_classes=n_classes,
39 |             n_outputs=n_outputs,
40 |             sparse=sparse,
41 |         )
42 | 
43 |     @abc.abstractproperty
44 |     def arms(self) -> list[bandit.base.ArmID]:
45 |         """The list of arms that can be pulled."""
46 | 
47 |     @property
48 |     def _repr_content(self):
49 |         return {**super()._repr_content, "Arms": f"{len(self.arms):,d}"}
50 | 


--------------------------------------------------------------------------------
/river/bandit/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | try:
 4 |     import gymnasium as gym
 5 | 
 6 |     GYM_INSTALLED = True
 7 | except ImportError:
 8 |     GYM_INSTALLED = False
 9 | 
10 | if GYM_INSTALLED:
11 |     from .candy_cane import CandyCaneContest
12 |     from .testbed import KArmedTestbed
13 | 
14 |     __all__ = ["CandyCaneContest", "KArmedTestbed"]
15 | 
16 |     RIVER_NAMESPACE = "river_bandits"
17 | 
18 |     if (env_id := f"{RIVER_NAMESPACE}/CandyCaneContest-v0") not in gym.envs.registration.registry:
19 |         gym.envs.registration.register(
20 |             id=env_id,
21 |             entry_point="river.bandit.envs:CandyCaneContest",
22 |             max_episode_steps=CandyCaneContest.n_steps,
23 |         )
24 |     if (env_id := f"{RIVER_NAMESPACE}/KArmedTestbed-v0") not in gym.envs.registration.registry:
25 |         gym.envs.registration.register(
26 |             id=env_id,
27 |             entry_point="river.bandit.envs:KArmedTestbed",
28 |             max_episode_steps=KArmedTestbed.n_steps,
29 |         )
30 | 


--------------------------------------------------------------------------------
/river/bandit/envs/testbed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | import gymnasium as gym
 6 | 
 7 | 
 8 | class KArmedTestbed(gym.Env):
 9 |     """k-armed testbed.
10 | 
11 |     This is a simple environment that can be used to test bandit algorithms. It is based on the
12 |     10 armed testbed described in the book "Reinforcement Learning: An Introduction" by Sutton and
13 |     Barto.
14 | 
15 |     Parameters
16 |     ----------
17 |     k
18 |         Number of arms.
19 | 
20 |     """
21 | 
22 |     n_steps = 1000
23 | 
24 |     def __init__(self, k: int = 10):
25 |         super().__init__()
26 |         self.k = k
27 |         self.action_space = gym.spaces.Discrete(k)
28 |         self.observation_space = gym.spaces.Discrete(k)
29 |         self.reward_range = (-math.inf, math.inf)
30 | 
31 |     def reset(self, seed=None, options=None):
32 |         super().reset(seed=seed)
33 |         self._actual_rewards = self.np_random.normal(loc=0, scale=1, size=self.k).tolist()
34 |         self._best_arm = max(enumerate(self._actual_rewards), key=lambda x: x[1])[0]
35 |         observation = self._best_arm
36 |         info = {}
37 |         return observation, info
38 | 
39 |     def step(self, arm):
40 |         arm_reward = self._actual_rewards[arm]
41 |         reward = self.np_random.normal(loc=arm_reward, scale=1)
42 | 
43 |         observation = self._best_arm
44 |         info = {}
45 |         terminated = False
46 |         truncated = False
47 |         return observation, reward, terminated, truncated, info
48 | 


--------------------------------------------------------------------------------
/river/bandit/test_envs.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import gymnasium as gym
 4 | import gymnasium.utils.env_checker
 5 | import pytest
 6 | 
 7 | from river import bandit
 8 | 
 9 | 
10 | def _iter_envs():
11 |     for env_name in gym.envs.registry:
12 |         if env_name.startswith(bandit.envs.RIVER_NAMESPACE):
13 |             yield gym.make(env_name)
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     "env",
18 |     [pytest.param(env, id=env.unwrapped.__class__.__name__) for env in _iter_envs()],
19 | )
20 | def test_gym_check_env(env):
21 |     gym.utils.env_checker.check_env(env.unwrapped)
22 | 


--------------------------------------------------------------------------------
/river/base/clusterer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import abc
 4 | from typing import Any
 5 | 
 6 | from . import estimator, typing
 7 | 
 8 | 
 9 | class Clusterer(estimator.Estimator):
10 |     """A clustering model."""
11 | 
12 |     @property
13 |     def _supervised(self) -> bool:
14 |         return False
15 | 
16 |     @abc.abstractmethod
17 |     def learn_one(self, x: dict[typing.FeatureName, Any]) -> None:
18 |         """Update the model with a set of features `x`.
19 | 
20 |         Parameters
21 |         ----------
22 |         x
23 |             A dictionary of features.
24 | 
25 |         """
26 | 
27 |     @abc.abstractmethod
28 |     def predict_one(self, x: dict[typing.FeatureName, Any]) -> int:
29 |         """Predicts the cluster number for a set of features `x`.
30 | 
31 |         Parameters
32 |         ----------
33 |         x
34 |             A dictionary of features.
35 | 
36 |         Returns
37 |         -------
38 |         A cluster number.
39 | 
40 |         """
41 | 


--------------------------------------------------------------------------------
/river/base/ensemble.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from collections import UserList
 4 | from collections.abc import Iterator
 5 | from random import Random
 6 | from typing import TypeVar
 7 | 
 8 | from .estimator import Estimator
 9 | from .wrapper import Wrapper
10 | 
11 | T = TypeVar("T", bound=Estimator)
12 | 
13 | 
14 | class Ensemble(UserList[T]):
15 |     """An ensemble is a model which is composed of a list of models.
16 | 
17 |     Parameters
18 |     ----------
19 |     models
20 | 
21 |     """
22 | 
23 |     def __init__(self, models: Iterator[T]) -> None:
24 |         super().__init__(models)
25 | 
26 |         if len(self) < self._min_number_of_models:
27 |             raise ValueError(
28 |                 f"At least {self._min_number_of_models} models are expected, "
29 |                 + f"only {len(self)} were passed"
30 |             )
31 | 
32 |     @property
33 |     def _min_number_of_models(self) -> int:
34 |         return 2
35 | 
36 |     @property
37 |     def models(self) -> list[T]:
38 |         return self.data
39 | 
40 | 
41 | class WrapperEnsemble(Ensemble[T], Wrapper[T]):
42 |     """A wrapper ensemble is an ensemble composed of multiple copies of the same model.
43 | 
44 |     Parameters
45 |     ----------
46 |     model
47 |         The model to copy.
48 |     n_models
49 |         The number of copies to make.
50 |     seed
51 |         Random number generator seed for reproducibility.
52 | 
53 |     """
54 | 
55 |     def __init__(self, model: T, n_models: int, seed: int | None) -> None:
56 |         super().__init__(model.clone() for _ in range(n_models))
57 |         self.model = model
58 |         self.n_models = n_models
59 |         self.seed = seed
60 |         self._rng = Random(seed)
61 | 
62 |     @property
63 |     def _wrapped_model(self) -> T:
64 |         return self.model
65 | 


--------------------------------------------------------------------------------
/river/base/tags.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | TEXT_INPUT = "text input"
4 | POSITIVE_INPUT = "positive input"
5 | 


--------------------------------------------------------------------------------
/river/base/typing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import typing
 4 | 
 5 | FeatureName = typing.Hashable
 6 | RegTarget = float
 7 | ClfTarget = typing.Union[bool, str, int]  # noqa: UP007
 8 | Target = typing.Union[ClfTarget, RegTarget]  # noqa: UP007
 9 | Dataset = typing.Iterable[typing.Tuple[dict[FeatureName, typing.Any], typing.Any]]  # noqa: UP006
10 | Stream = typing.Iterator[typing.Tuple[dict[FeatureName, typing.Any], typing.Any]]  # noqa: UP006
11 | 
12 | 
13 | # These classes aim to provide the first blocks towards using protocols.
14 | # They should be modified if needed.
15 | class Learner(typing.Protocol):
16 |     def learn_one(self, x: dict[FeatureName, typing.Any], y: Target) -> None: ...
17 | 
18 | 
19 | class Predictor(Learner, typing.Protocol):
20 |     def predict_one(self, x: dict[FeatureName, typing.Any]) -> Target: ...
21 | 


--------------------------------------------------------------------------------
/river/base/wrapper.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | from typing import Generic, TypeVar
 5 | 
 6 | from river import base
 7 | 
 8 | from .estimator import Estimator  # Prevent a circular import of module base
 9 | 
10 | T = TypeVar("T", bound=Estimator)
11 | 
12 | 
13 | class Wrapper(ABC, Generic[T]):
14 |     """A wrapper model."""
15 | 
16 |     @property
17 |     @abstractmethod
18 |     def _wrapped_model(self) -> T:
19 |         """Provides access to the wrapped model."""
20 | 
21 |     @property
22 |     def _labelloc(self) -> str:
23 |         """Indicates location of the wrapper name when drawing pipelines."""
24 |         return "t"  # for top
25 | 
26 |     def __str__(self) -> str:
27 |         return f"{type(self).__name__}({self._wrapped_model})"
28 | 
29 |     def _more_tags(self) -> set[str]:
30 |         return self._wrapped_model._tags
31 | 
32 |     @property
33 |     def _supervised(self) -> bool:
34 |         return self._wrapped_model._supervised
35 | 
36 |     @property
37 |     def _multiclass(self) -> bool:
38 |         return isinstance(self._wrapped_model, base.Classifier) and self._wrapped_model._multiclass
39 | 


--------------------------------------------------------------------------------
/river/checks/anomaly.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | 
 4 | def check_roc_auc(anomaly_detector, dataset):
 5 |     """The ROC AUC should always be above 50%."""
 6 | 
 7 |     from sklearn import metrics
 8 | 
 9 |     scores = []
10 |     labels = []
11 | 
12 |     for x, y in dataset:
13 |         anomaly_detector.learn_one(x)
14 |         y_pred = anomaly_detector.score_one(x)
15 | 
16 |         scores.append(y_pred)
17 |         labels.append(y)
18 | 
19 |     assert metrics.roc_auc_score(labels, scores) >= 0.5
20 | 


--------------------------------------------------------------------------------
/river/checks/clf.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import copy
 4 | import math
 5 | 
 6 | 
 7 | def check_predict_proba_one(classifier, dataset):
 8 |     """predict_proba_one should return a valid probability distribution and be pure."""
 9 | 
10 |     from river import utils
11 | 
12 |     if not hasattr(classifier, "predict_proba_one"):
13 |         return
14 | 
15 |     for x, y in dataset:
16 |         xx, yy = copy.deepcopy(x), copy.deepcopy(y)
17 | 
18 |         classifier.learn_one(x, y)
19 |         y_pred = classifier.predict_proba_one(x)
20 | 
21 |         if utils.inspect.isactivelearner(classifier):
22 |             y_pred, _ = y_pred
23 | 
24 |         # Check the probabilities are coherent
25 |         assert isinstance(y_pred, dict)
26 |         for proba in y_pred.values():
27 |             assert 0.0 <= proba <= 1.0
28 |         assert math.isclose(sum(y_pred.values()), 1.0)
29 | 
30 |         # Check predict_proba_one is pure (i.e. x and y haven't changed)
31 |         assert x == xx
32 |         assert y == yy
33 | 
34 | 
35 | def check_predict_proba_one_binary(classifier, dataset):
36 |     """predict_proba_one should return a dict with True and False keys."""
37 | 
38 |     for x, y in dataset:
39 |         y_pred = classifier.predict_proba_one(x)
40 |         classifier.learn_one(x, y)
41 |         assert set(y_pred.keys()) == {False, True}
42 | 
43 | 
44 | def check_multiclass_is_bool(model):
45 |     assert isinstance(model._multiclass, bool)
46 | 


--------------------------------------------------------------------------------
/river/checks/model_selection.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import itertools
 4 | 
 5 | 
 6 | def check_model_selection_order_does_not_matter(model, dataset):
 7 |     best_params = []
 8 |     permutations = list(itertools.permutations(model.models))
 9 |     datasets = itertools.tee(dataset, len(permutations))
10 | 
11 |     for permutation, dataset in zip(permutations, datasets):
12 |         models = [model.clone() for model in permutation]
13 |         clone = model.clone(new_params={"models": models})
14 |         for x, y in dataset:
15 |             clone.predict_one(x)
16 |             clone.learn_one(x, y)
17 |         best_params.append(clone.best_model._get_params())
18 | 
19 |     # Check that the best params are always the same
20 |     assert all(params == best_params[0] for params in best_params)
21 | 


--------------------------------------------------------------------------------
/river/checks/reco.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import random
 4 | 
 5 | 
 6 | def check_reco_routine(ranker):
 7 |     users = ["Tom", "Anna"]
 8 |     items = {"politics", "sports", "music", "food", "finance", "health", "camping"}
 9 | 
10 |     def get_reward(user, item) -> bool:
11 |         if user == "Tom":
12 |             return item in {"music", "politics"}
13 |         return item in {"politics", "sports"}
14 | 
15 |     for i in range(100):
16 |         user = random.choice(users)
17 |         item = ranker.rank(user, items)[0]
18 | 
19 |         clicked = get_reward(user, item)
20 | 
21 |         ranker.learn_one(user, item, clicked)
22 | 


--------------------------------------------------------------------------------
/river/checks/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import inspect
 4 | import math
 5 | 
 6 | 
 7 | def assert_predictions_are_close(y1, y2):
 8 |     if isinstance(y1, dict):
 9 |         for k in y1:
10 |             assert_predictions_are_close(y1[k], y2[k])
11 |     elif isinstance(y1, float):
12 |         assert math.isclose(y1, y2, rel_tol=1e-06)
13 |     else:
14 |         assert y1 == y2
15 | 
16 | 
17 | def seed_params(params, seed):
18 |     """Looks for "seed" keys and sets the value."""
19 | 
20 |     def is_class_param(param):
21 |         return isinstance(param, tuple) and inspect.isclass(param[0]) and isinstance(param[1], dict)
22 | 
23 |     if is_class_param(params):
24 |         return params[0], seed_params(params[1], seed)
25 | 
26 |     if not isinstance(params, dict):
27 |         return params
28 | 
29 |     return {
30 |         name: seed if name == "seed" else seed_params(param, seed) for name, param in params.items()
31 |     }
32 | 


--------------------------------------------------------------------------------
/river/cluster/__init__.py:
--------------------------------------------------------------------------------
 1 | """Unsupervised clustering."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .clustream import CluStream
 6 | from .dbstream import DBSTREAM
 7 | from .denstream import DenStream
 8 | from .k_means import KMeans
 9 | from .odac import ODAC
10 | from .streamkmeans import STREAMKMeans
11 | from .textclust import TextClust
12 | 
13 | __all__ = ["CluStream", "DBSTREAM", "DenStream", "KMeans", "ODAC", "STREAMKMeans", "TextClust"]
14 | 


--------------------------------------------------------------------------------
/river/compat/__init__.py:
--------------------------------------------------------------------------------
 1 | """Compatibility tools.
 2 | 
 3 | This module contains adapters for making River estimators compatible with other libraries, and
 4 | vice-versa whenever possible. The relevant adapters will only be usable if you have installed the
 5 | necessary library. For instance, you have to install scikit-learn in order to use the
 6 | `compat.convert_sklearn_to_river` function.
 7 | 
 8 | """
 9 | 
10 | from __future__ import annotations
11 | 
12 | __all__: list[str] = []
13 | 
14 | try:
15 |     from .river_to_sklearn import (
16 |         River2SKLClassifier,
17 |         River2SKLClusterer,
18 |         River2SKLRegressor,
19 |         River2SKLTransformer,
20 |         convert_river_to_sklearn,
21 |     )
22 |     from .sklearn_to_river import SKL2RiverClassifier, SKL2RiverRegressor, convert_sklearn_to_river
23 | 
24 |     __all__ += [
25 |         "convert_river_to_sklearn",
26 |         "convert_sklearn_to_river",
27 |         "River2SKLRegressor",
28 |         "River2SKLClassifier",
29 |         "River2SKLClusterer",
30 |         "River2SKLTransformer",
31 |         "SKL2RiverClassifier",
32 |         "SKL2RiverRegressor",
33 |     ]
34 | except ModuleNotFoundError:
35 |     pass
36 | 


--------------------------------------------------------------------------------
/river/compose/__init__.py:
--------------------------------------------------------------------------------
 1 | """Model composition.
 2 | 
 3 | This module contains utilities for merging multiple modeling steps into a single pipeline. Although
 4 | pipelines are not the only way to process a stream of data, we highly encourage you to use them.
 5 | 
 6 | """
 7 | 
 8 | from __future__ import annotations
 9 | 
10 | from .func import FuncTransformer
11 | from .grouper import Grouper
12 | from .pipeline import Pipeline, learn_during_predict
13 | from .product import TransformerProduct
14 | from .renamer import Prefixer, Renamer, Suffixer
15 | from .select import Discard, Select, SelectType
16 | from .target_transform import TargetTransformRegressor
17 | from .union import TransformerUnion
18 | 
19 | __all__ = [
20 |     "Discard",
21 |     "FuncTransformer",
22 |     "Grouper",
23 |     "Pipeline",
24 |     "Prefixer",
25 |     "pure_inference_mode",
26 |     "Renamer",
27 |     "Select",
28 |     "SelectType",
29 |     "Suffixer",
30 |     "TargetTransformRegressor",
31 |     "TransformerProduct",
32 |     "TransformerUnion",
33 |     "learn_during_predict",
34 | ]
35 | 


--------------------------------------------------------------------------------
/river/compose/grouper.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import collections
 4 | import copy
 5 | import functools
 6 | 
 7 | from river import base
 8 | 
 9 | __all__ = ["Grouper"]
10 | 
11 | 
12 | class Grouper(base.Transformer):
13 |     """Applies a transformer within different groups.
14 | 
15 |     This transformer allows you to split your data into groups and apply a transformer within each
16 |     group. This happens in a streaming manner, which means that the groups are discovered online.
17 |     A separate copy of the provided transformer is made whenever a new group appears. The groups
18 |     are defined according to one or more keys.
19 | 
20 |     Parameters
21 |     ----------
22 |     transformer
23 |     by
24 |         The field on which to group the data. This can either by a single value, or a list of
25 |         values.
26 | 
27 |     """
28 | 
29 |     def __init__(
30 |         self,
31 |         transformer: base.BaseTransformer,
32 |         by: base.typing.FeatureName | list[base.typing.FeatureName],
33 |     ):
34 |         self.transformer = transformer
35 |         self.by = by if isinstance(by, list) else [by]
36 |         self.transformers: collections.defaultdict = collections.defaultdict(
37 |             functools.partial(copy.deepcopy, transformer)
38 |         )
39 | 
40 |     def _get_key(self, x):
41 |         return "_".join(str(x[k]) for k in self.by)
42 | 
43 |     def learn_one(self, x):
44 |         key = self._get_key(x)
45 |         self.transformers[key].learn_one(x)
46 | 
47 |     def transform_one(self, x):
48 |         key = self._get_key(x)
49 |         return self.transformers[key].transform_one(x)
50 | 


--------------------------------------------------------------------------------
/river/conf/__init__.py:
--------------------------------------------------------------------------------
 1 | """Conformal predictions. This modules contains wrappers to enable conformal predictions on any
 2 | regressor or classifier."""
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from .interval import Interval
 7 | from .jackknife import RegressionJackknife
 8 | 
 9 | __all__ = [
10 |     "Interval",
11 |     "RegressionJackknife",
12 | ]
13 | 


--------------------------------------------------------------------------------
/river/conf/interval.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import dataclasses
 4 | 
 5 | 
 6 | @dataclasses.dataclass
 7 | class Interval:
 8 |     """An object to represent a (prediction) interval.
 9 | 
10 |     Users are not expected to use this class as-is. Instead, they should use the `with_interval`
11 |     parameter of the `predict_one` method of any regressor or classifier wrapped with a conformal
12 |     prediction method.
13 | 
14 |     Parameters
15 |     ----------
16 |     lower
17 |         The lower bound of the interval.
18 |     upper
19 |         The upper bound of the interval.
20 | 
21 |     """
22 | 
23 |     lower: float
24 |     upper: float
25 | 
26 |     @property
27 |     def center(self):
28 |         """The center of the interval."""
29 |         return (self.lower + self.upper) / 2
30 | 
31 |     @property
32 |     def width(self):
33 |         """The width of the interval."""
34 |         return self.upper - self.lower
35 | 
36 |     def __contains__(self, x):
37 |         return self.lower <= x <= self.upper
38 | 


--------------------------------------------------------------------------------
/river/conftest.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | collect_ignore = []
 4 | 
 5 | try:
 6 |     import sklearn  # noqa: F401
 7 | except ImportError:
 8 |     collect_ignore.append("compat/test_sklearn.py")
 9 | 
10 | try:
11 |     import sqlalchemy  # noqa: F401
12 | except ImportError:
13 |     collect_ignore.append("stream/iter_sql.py")
14 |     collect_ignore.append("stream/test_sql.py")
15 | 
16 | try:
17 |     import vaex  # noqa: F401
18 | except ImportError:
19 |     collect_ignore.append("stream/iter_vaex.py")
20 | 


--------------------------------------------------------------------------------
/river/covariance/__init__.py:
--------------------------------------------------------------------------------
1 | """Online estimation of covariance and precision matrices."""
2 | 
3 | from __future__ import annotations
4 | 
5 | from .emp import EmpiricalCovariance, EmpiricalPrecision
6 | 
7 | __all__ = ["EmpiricalCovariance", "EmpiricalPrecision"]
8 | 


--------------------------------------------------------------------------------
/river/datasets/airline_passengers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class AirlinePassengers(base.FileDataset):
 9 |     """Monthly number of international airline passengers.
10 | 
11 |     The stream contains 144 items and only one single feature, which is the month. The goal is to
12 |     predict the number of passengers each month by capturing the trend and the seasonality of the
13 |     data.
14 | 
15 |     References
16 |     ----------
17 |     [^1]: [International airline passengers: monthly totals in thousands. Jan 49 – Dec 60](https://rdrr.io/r/datasets/AirPassengers.html)
18 | 
19 |     """
20 | 
21 |     def __init__(self):
22 |         super().__init__(
23 |             filename="airline-passengers.csv",
24 |             task=base.REG,
25 |             n_features=1,
26 |             n_samples=144,
27 |         )
28 | 
29 |     def __iter__(self):
30 |         return stream.iter_csv(
31 |             self.path,
32 |             target="passengers",
33 |             converters={"passengers": int},
34 |             parse_dates={"month": "%Y-%m"},
35 |         )
36 | 


--------------------------------------------------------------------------------
/river/datasets/banana.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/river/datasets/banana.zip


--------------------------------------------------------------------------------
/river/datasets/bananas.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class Bananas(base.FileDataset):
 9 |     """Bananas dataset.
10 | 
11 |     An artificial dataset where instances belongs to several clusters with a banana shape.
12 |     There are two attributes that correspond to the x and y axis, respectively.
13 | 
14 |     References
15 |     ----------
16 |     [^1]: [OpenML page](https://www.openml.org/d/1460)
17 | 
18 |     """
19 | 
20 |     def __init__(self):
21 |         super().__init__(filename="banana.zip", n_samples=5300, n_features=2, task=base.BINARY_CLF)
22 | 
23 |     def __iter__(self):
24 |         return stream.iter_libsvm(self.path, target_type=lambda x: x == "1")
25 | 


--------------------------------------------------------------------------------
/river/datasets/bikes.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class Bikes(base.RemoteDataset):
 9 |     """Bike sharing station information from the city of Toulouse.
10 | 
11 |     The goal is to predict the number of bikes in 5 different bike stations from the city of
12 |     Toulouse.
13 | 
14 |     References
15 |     ----------
16 |     [^1]: [A short introduction and conclusion to the OpenBikes 2016 Challenge](https://maxhalford.github.io/blog/openbikes-challenge/)
17 | 
18 |     """
19 | 
20 |     def __init__(self):
21 |         super().__init__(
22 |             url="https://maxhalford.github.io/files/datasets/toulouse_bikes.zip",
23 |             size=13_125_015,
24 |             n_samples=182_470,
25 |             n_features=8,
26 |             task=base.REG,
27 |             filename="toulouse_bikes.csv",
28 |         )
29 | 
30 |     def _iter(self):
31 |         return stream.iter_csv(
32 |             self.path,
33 |             target="bikes",
34 |             converters={
35 |                 "clouds": int,
36 |                 "humidity": int,
37 |                 "pressure": float,
38 |                 "temperature": float,
39 |                 "wind": float,
40 |                 "bikes": int,
41 |             },
42 |             parse_dates={"moment": "%Y-%m-%d %H:%M:%S"},
43 |         )
44 | 


--------------------------------------------------------------------------------
/river/datasets/chick_weights.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class ChickWeights(base.FileDataset):
 9 |     """Chick weights along time.
10 | 
11 |     The stream contains 578 items and 3 features. The goal is to predict the weight of each chick
12 |     along time, according to the diet the chick is on. The data is ordered by time and then by
13 |     chick.
14 | 
15 |     References
16 |     ----------
17 |     [^1]: [Chick weight dataset overview](http://rstudio-pubs-static.s3.amazonaws.com/107631_131ad1c022df4f90aa2d214a5c5609b2.html)
18 | 
19 |     """
20 | 
21 |     def __init__(self):
22 |         super().__init__(filename="chick-weights.csv", n_samples=578, n_features=3, task=base.REG)
23 | 
24 |     def __iter__(self):
25 |         return stream.iter_csv(
26 |             self.path,
27 |             target="weight",
28 |             converters={"time": int, "weight": int, "chick": int, "diet": int},
29 |         )
30 | 


--------------------------------------------------------------------------------
/river/datasets/http.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class HTTP(base.RemoteDataset):
 9 |     """HTTP dataset of the KDD 1999 cup.
10 | 
11 |     The goal is to predict whether or not an HTTP connection is anomalous or not. The dataset only
12 |     contains 2,211 (0.4%) positive labels.
13 | 
14 |     References
15 |     ----------
16 |     [^1]: [HTTP (KDDCUP99) dataset](http://odds.cs.stonybrook.edu/http-kddcup99-dataset/)
17 | 
18 |     """
19 | 
20 |     def __init__(self):
21 |         super().__init__(
22 |             n_samples=567_498,
23 |             n_features=3,
24 |             task=base.BINARY_CLF,
25 |             url="https://maxhalford.github.io/files/datasets/kdd99_http.zip",
26 |             size=32_400_738,
27 |             filename="kdd99_http.csv",
28 |         )
29 | 
30 |     def _iter(self):
31 |         converters = {
32 |             "duration": float,
33 |             "src_bytes": float,
34 |             "dst_bytes": float,
35 |             "service": int,
36 |         }
37 |         return stream.iter_csv(self.path, target="service", converters=converters)
38 | 


--------------------------------------------------------------------------------
/river/datasets/malicious_url.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import os
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class MaliciousURL(base.RemoteDataset):
 9 |     """Malicious URLs dataset.
10 | 
11 |     This dataset contains features about URLs that are classified as malicious or not.
12 | 
13 |     References
14 |     ----------
15 |     [^1]: [Detecting Malicious URLs](http://www.sysnet.ucsd.edu/projects/url/)
16 |     [^2]: [Identifying Suspicious URLs: An Application of Large-Scale Online Learning](http://cseweb.ucsd.edu/~jtma/papers/url-icml2009.pdf)
17 | 
18 |     """
19 | 
20 |     def __init__(self):
21 |         super().__init__(
22 |             n_samples=2_396_130,
23 |             n_features=3_231_961,
24 |             task=base.BINARY_CLF,
25 |             url="http://www.sysnet.ucsd.edu/projects/url/url_svmlight.tar.gz",
26 |             filename="url_svmlight",
27 |             size=2_210_273_352,
28 |             sparse=True,
29 |         )
30 | 
31 |     def _iter(self):
32 |         files = list(self.path.glob("Day*.svm"))
33 |         files.sort(key=lambda x: int(os.path.basename(x).split(".")[0][3:]))
34 | 
35 |         def parse_libsvm_feature(f):
36 |             k, v = f.split(":")
37 |             return int(k), float(v)
38 | 
39 |         # There are 150 files with each one corresponding to a day
40 |         for file in files:
41 |             with open(file) as f:
42 |                 for line in f:
43 |                     # Each file has the libsvm format
44 |                     elements = line.rstrip().split(" ")
45 |                     y = elements.pop(0) == "+1"
46 |                     x = dict(parse_libsvm_feature(f) for f in elements)
47 |                     yield x, y
48 | 


--------------------------------------------------------------------------------
/river/datasets/phishing.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/river/datasets/phishing.csv.gz


--------------------------------------------------------------------------------
/river/datasets/phishing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class Phishing(base.FileDataset):
 9 |     """Phishing websites.
10 | 
11 |     This dataset contains features from web pages that are classified as phishing or not.
12 | 
13 |     References
14 |     ----------
15 |     [^1]: [UCI page](http://archive.ics.uci.edu/ml/datasets/Website+Phishing)
16 | 
17 |     """
18 | 
19 |     def __init__(self) -> None:
20 |         super().__init__(
21 |             n_samples=1_250,
22 |             n_features=9,
23 |             task=base.BINARY_CLF,
24 |             filename="phishing.csv.gz",
25 |         )
26 | 
27 |     def __iter__(self):
28 |         return stream.iter_csv(
29 |             self.path,
30 |             target="is_phishing",
31 |             converters={
32 |                 "empty_server_form_handler": float,
33 |                 "popup_window": float,
34 |                 "https": float,
35 |                 "request_from_other_domain": float,
36 |                 "anchor_from_other_domain": float,
37 |                 "is_popular": float,
38 |                 "long_url": float,
39 |                 "age_of_domain": int,
40 |                 "ip_in_url": int,
41 |                 "is_phishing": lambda x: x == "1",
42 |             },
43 |         )
44 | 


--------------------------------------------------------------------------------
/river/datasets/restaurants.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import ast
 4 | 
 5 | from river import stream
 6 | 
 7 | from . import base
 8 | 
 9 | 
10 | class Restaurants(base.RemoteDataset):
11 |     """Data from the Kaggle Recruit Restaurants challenge.
12 | 
13 |     The goal is to predict the number of visitors in each of 829 Japanese restaurants over a period
14 |     of roughly 16 weeks. The data is ordered by date and then by restaurant ID.
15 | 
16 |     References
17 |     ----------
18 |     [^1]: [Recruit Restaurant Visitor Forecasting](https://www.kaggle.com/c/recruit-restaurant-visitor-forecasting)
19 | 
20 |     """
21 | 
22 |     def __init__(self):
23 |         super().__init__(
24 |             n_samples=252_108,
25 |             n_features=7,
26 |             task=base.REG,
27 |             url="https://maxhalford.github.io/files/datasets/kaggle_recruit_restaurants.zip",
28 |             size=28_881_242,
29 |             filename="kaggle_recruit_restaurants.csv",
30 |         )
31 | 
32 |     def _iter(self):
33 |         return stream.iter_csv(
34 |             self.path,
35 |             target="visitors",
36 |             converters={
37 |                 "latitude": float,
38 |                 "longitude": float,
39 |                 "visitors": int,
40 |                 "is_holiday": ast.literal_eval,
41 |             },
42 |             parse_dates={"date": "%Y-%m-%d"},
43 |         )
44 | 


--------------------------------------------------------------------------------
/river/datasets/segment.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/river/datasets/segment.csv.zip


--------------------------------------------------------------------------------
/river/datasets/segment.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class ImageSegments(base.FileDataset):
 9 |     """Image segments classification.
10 | 
11 |     This dataset contains features that describe image segments into 7 classes: brickface, sky,
12 |     foliage, cement, window, path, and grass.
13 | 
14 |     References
15 |     ----------
16 |     [^1]: [UCI page](https://archive.ics.uci.edu/ml/datasets/Statlog+(Image+Segmentation))
17 | 
18 |     """
19 | 
20 |     def __init__(self):
21 |         super().__init__(
22 |             n_samples=2_310,
23 |             n_classes=7,
24 |             n_features=18,
25 |             task=base.MULTI_CLF,
26 |             filename="segment.csv.zip",
27 |         )
28 | 
29 |     def __iter__(self):
30 |         return stream.iter_csv(
31 |             self.path,
32 |             target="category",
33 |             converters={
34 |                 "region-centroid-col": int,
35 |                 "region-centroid-row": int,
36 |                 "short-line-density-5": float,
37 |                 "short-line-density-2": float,
38 |                 "vedge-mean": float,
39 |                 "vegde-sd": float,
40 |                 "hedge-mean": float,
41 |                 "hedge-sd": float,
42 |                 "intensity-mean": float,
43 |                 "rawred-mean": float,
44 |                 "rawblue-mean": float,
45 |                 "rawgreen-mean": float,
46 |                 "exred-mean": float,
47 |                 "exblue-mean": float,
48 |                 "exgreen-mean": float,
49 |                 "value-mean": float,
50 |                 "saturation-mean": float,
51 |                 "hue-mean": float,
52 |             },
53 |         )
54 | 


--------------------------------------------------------------------------------
/river/datasets/sms_spam.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from . import base
 4 | 
 5 | 
 6 | class SMSSpam(base.RemoteDataset):
 7 |     """SMS Spam Collection dataset.
 8 | 
 9 |     The data contains 5,574 items and 1 feature (i.e. SMS body). Spam messages represent
10 |     13.4% of the dataset. The goal is to predict whether an SMS is a spam or not.
11 | 
12 |     References
13 |     ----------
14 |     [^1]: [Almeida, T.A., Hidalgo, J.M.G. and Yamakami, A., 2011, September. Contributions to the study of SMS spam filtering: new collection and results. In Proceedings of the 11th ACM symposium on Document engineering (pp. 259-262).](http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/doceng11.pdf)
15 | 
16 |     """
17 | 
18 |     def __init__(self):
19 |         super().__init__(
20 |             n_samples=5_574,
21 |             n_features=1,
22 |             task=base.BINARY_CLF,
23 |             url="https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip",
24 |             size=477_907,
25 |             filename="SMSSpamCollection",
26 |         )
27 | 
28 |     def _iter(self):
29 |         with open(self.path) as f:
30 |             for row in f:
31 |                 label, body = row.split("\t")
32 |                 yield ({"body": body}, label == "spam")
33 | 


--------------------------------------------------------------------------------
/river/datasets/smtp.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class SMTP(base.RemoteDataset):
 9 |     """SMTP dataset from the KDD 1999 cup.
10 | 
11 |     The goal is to predict whether or not an SMTP connection is anomalous or not. The dataset only
12 |     contains 2,211 (0.4%) positive labels.
13 | 
14 |     References
15 |     ----------
16 |     [^1]: [SMTP (KDDCUP99) dataset](http://odds.cs.stonybrook.edu/smtp-kddcup99-dataset/)
17 | 
18 |     """
19 | 
20 |     def __init__(self):
21 |         super().__init__(
22 |             n_samples=95_156,
23 |             n_features=3,
24 |             task=base.BINARY_CLF,
25 |             url="https://maxhalford.github.io/files/datasets/smtp.zip",
26 |             size=5_484_982,
27 |             filename="smtp.csv",
28 |         )
29 | 
30 |     def _iter(self):
31 |         return stream.iter_csv(
32 |             self.path,
33 |             target="service",
34 |             converters={
35 |                 "duration": float,
36 |                 "src_bytes": float,
37 |                 "dst_bytes": float,
38 |                 "service": int,
39 |             },
40 |         )
41 | 


--------------------------------------------------------------------------------
/river/datasets/solar-flare.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/river/datasets/solar-flare.csv.zip


--------------------------------------------------------------------------------
/river/datasets/solar_flare.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class SolarFlare(base.FileDataset):
 9 |     """Solar flare multi-output regression.
10 | 
11 |     References
12 |     ----------
13 |     [^1]: [UCI page](https://archive.ics.uci.edu/ml/datasets/Solar+Flare)
14 | 
15 |     """
16 | 
17 |     def __init__(self):
18 |         super().__init__(
19 |             n_samples=1_066,
20 |             n_features=10,
21 |             n_outputs=3,
22 |             task=base.MO_REG,
23 |             filename="solar-flare.csv.zip",
24 |         )
25 | 
26 |     def __iter__(self):
27 |         return stream.iter_csv(
28 |             self.path,
29 |             target=["c-class-flares", "m-class-flares", "x-class-flares"],
30 |             converters={
31 |                 "zurich-class": str,
32 |                 "largest-spot-size": str,
33 |                 "spot-distribution": str,
34 |                 "activity": int,
35 |                 "evolution": int,
36 |                 "previous-24h-flare-activity": int,
37 |                 "hist-complex": int,
38 |                 "hist-complex-this-pass": int,
39 |                 "area": int,
40 |                 "largest-spot-area": int,
41 |                 "c-class-flares": int,
42 |                 "m-class-flares": int,
43 |                 "x-class-flares": int,
44 |             },
45 |         )
46 | 


--------------------------------------------------------------------------------
/river/datasets/taxis.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class Taxis(base.RemoteDataset):
 9 |     """Taxi ride durations in New York City.
10 | 
11 |     The goal is to predict the duration of taxi rides in New York City.
12 | 
13 |     References
14 |     ----------
15 |     [^1]: [New York City Taxi Trip Duration competition on Kaggle](https://www.kaggle.com/c/nyc-taxi-trip-duration)
16 | 
17 |     """
18 | 
19 |     def __init__(self):
20 |         super().__init__(
21 |             n_samples=1_458_644,
22 |             n_features=8,
23 |             task=base.REG,
24 |             url="https://maxhalford.github.io/files/datasets/nyc_taxis.zip",
25 |             size=195_271_696,
26 |             filename="train.csv",
27 |         )
28 | 
29 |     def _iter(self):
30 |         return stream.iter_csv(
31 |             self.path,
32 |             target="trip_duration",
33 |             converters={
34 |                 "passenger_count": int,
35 |                 "pickup_longitude": float,
36 |                 "pickup_latitude": float,
37 |                 "dropoff_longitude": float,
38 |                 "dropoff_latitude": float,
39 |                 "trip_duration": int,
40 |             },
41 |             parse_dates={"pickup_datetime": "%Y-%m-%d %H:%M:%S"},
42 |             drop=["dropoff_datetime", "id"],
43 |         )
44 | 


--------------------------------------------------------------------------------
/river/datasets/trec07.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class TREC07(base.RemoteDataset):
 9 |     """TREC's 2007 Spam Track dataset.
10 | 
11 |     The data contains 75,419 chronologically ordered items, i.e. 3 months of emails delivered
12 |     to a particular server in 2007. Spam messages represent 66.6% of the dataset.
13 |     The goal is to predict whether an email is a spam or not.
14 | 
15 |     The available raw features are: sender, recipients, date, subject, body.
16 | 
17 |     References
18 |     ----------
19 |     [^1]: [TREC 2007 Spam Track Overview](https://trec.nist.gov/pubs/trec16/papers/SPAM.OVERVIEW16.pdf)
20 |     [^2]: [Code ran to parse the dataset](https://gist.github.com/gbolmier/b6a942699aaaedec54041a32e4f34d40)
21 | 
22 |     """
23 | 
24 |     def __init__(self):
25 |         super().__init__(
26 |             n_samples=75_419,
27 |             n_features=5,
28 |             task=base.BINARY_CLF,
29 |             url="https://maxhalford.github.io/files/datasets/trec07p.zip",
30 |             size=144_504_829,
31 |             filename="trec07p.csv",
32 |         )
33 | 
34 |     def _iter(self):
35 |         return stream.iter_csv(
36 |             self.path,
37 |             target="y",
38 |             delimiter=",",
39 |             quotechar='"',
40 |             field_size_limit=1_000_000,
41 |         )
42 | 


--------------------------------------------------------------------------------
/river/datasets/trump_approval.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/river/datasets/trump_approval.csv.gz


--------------------------------------------------------------------------------
/river/datasets/trump_approval.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class TrumpApproval(base.FileDataset):
 9 |     """Donald Trump approval ratings.
10 | 
11 |     This dataset was obtained by reshaping the data used by FiveThirtyEight for analyzing Donald
12 |     Trump's approval ratings. It contains 5 features, which are approval ratings collected by
13 |     5 polling agencies. The target is the approval rating from FiveThirtyEight's model. The goal of
14 |     this task is to see if we can reproduce FiveThirtyEight's model.
15 | 
16 |     References
17 |     ----------
18 |     [^1]: [Trump Approval Ratings](https://projects.fivethirtyeight.com/trump-approval-ratings/)
19 | 
20 |     """
21 | 
22 |     def __init__(self):
23 |         super().__init__(
24 |             n_samples=1_001,
25 |             n_features=6,
26 |             task=base.REG,
27 |             filename="trump_approval.csv.gz",
28 |         )
29 | 
30 |     def __iter__(self):
31 |         return stream.iter_csv(
32 |             self.path,
33 |             target="five_thirty_eight",
34 |             converters={
35 |                 "ordinal_date": int,
36 |                 "gallup": float,
37 |                 "ipsos": float,
38 |                 "morning_consult": float,
39 |                 "rasmussen": float,
40 |                 "you_gov": float,
41 |                 "five_thirty_eight": float,
42 |             },
43 |         )
44 | 


--------------------------------------------------------------------------------
/river/datasets/water_flow.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stream
 4 | 
 5 | from . import base
 6 | 
 7 | 
 8 | class WaterFlow(base.FileDataset):
 9 |     """Water flow through a pipeline branch.
10 | 
11 |     The series includes hourly values for about 2 months, March 2022 to May 2022. The values are
12 |     expressed in liters per second. There are four anomalous segments in the series:
13 | 
14 |     * 3 "low value moments": this is due to water losses or human intervention for maintenance
15 |     * A small peak in the water inflow after the first 2 segments: this is due to a pumping
16 |         operation into the main pipeline, when more water pressure is needed
17 | 
18 |     This dataset is well suited for time series forecasting models, as well as anomaly detection
19 |     methods. Ideally, the goal is to build a time series forecasting model that is robust to the
20 |     anomalous segments.
21 | 
22 |     This data has been kindly donated by the Tecnojest s.r.l. company (www.invidea.it) from Italy.
23 | 
24 |     """
25 | 
26 |     def __init__(self):
27 |         super().__init__(
28 |             filename="water-flow.csv",
29 |             task=base.REG,
30 |             n_features=1,
31 |             n_samples=1_268,
32 |         )
33 | 
34 |     def __iter__(self):
35 |         return stream.iter_csv(
36 |             self.path,
37 |             target="Water flow [l/s]",
38 |             converters={"Water flow [l/s]": float},
39 |             parse_dates={"Time": "%Y-%m-%dT%H:%M:%S%z"},
40 |         )
41 | 


--------------------------------------------------------------------------------
/river/drift/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Concept Drift Detection.
 3 | 
 4 | This module contains concept drift detection methods. The purpose of a drift detector is to raise
 5 | an alarm if the data distribution changes. A good drift detector method is the one that maximizes
 6 | the true positives while keeping the number of false positives to a minimum.
 7 | 
 8 | """
 9 | 
10 | from __future__ import annotations
11 | 
12 | from . import binary, datasets
13 | from .adwin import ADWIN
14 | from .dummy import DummyDriftDetector
15 | from .kswin import KSWIN
16 | from .no_drift import NoDrift
17 | from .page_hinkley import PageHinkley
18 | from .retrain import DriftRetrainingClassifier
19 | 
20 | __all__ = [
21 |     "binary",
22 |     "datasets",
23 |     "ADWIN",
24 |     "DriftRetrainingClassifier",
25 |     "DummyDriftDetector",
26 |     "KSWIN",
27 |     "NoDrift",
28 |     "PageHinkley",
29 |     "PeriodicTrigger",
30 | ]
31 | 


--------------------------------------------------------------------------------
/river/drift/adwin_c.pyi:
--------------------------------------------------------------------------------
 1 | class AdaptiveWindowing:
 2 |     def __init__(
 3 |         self,
 4 |         delta: float = 0.002,
 5 |         clock: int = 32,
 6 |         max_buckets: int = 5,
 7 |         min_window_length: int = 5,
 8 |         grace_period: int = 10,
 9 |     ) -> None: ...
10 |     def get_n_detections(self) -> int: ...
11 |     def get_width(self) -> float: ...
12 |     def get_total(self) -> float: ...
13 |     def get_variance(self) -> float: ...
14 |     @property
15 |     def variance_in_window(self) -> float: ...
16 |     def update(self, value: float) -> bool: ...
17 | 
18 | class Bucket:
19 |     def __init__(self, max_size: int) -> None: ...
20 |     def clear_at(self, index: int) -> None: ...
21 |     def insert_data(self, value: float, variance: float) -> None: ...
22 |     def remove(self) -> None: ...
23 |     def compress(self, n_elements: int) -> None: ...
24 |     def get_total_at(self, index: int) -> float: ...
25 |     def get_variance_at(self, index: int) -> float: ...
26 |     def set_total_at(self, value: float, index: int) -> None: ...
27 |     def set_variance_at(self, value: float, index: int) -> None: ...
28 | 


--------------------------------------------------------------------------------
/river/drift/binary/__init__.py:
--------------------------------------------------------------------------------
 1 | """Drift detection for binary data."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .ddm import DDM
 6 | from .eddm import EDDM
 7 | from .fhddm import FHDDM
 8 | from .hddm_a import HDDM_A
 9 | from .hddm_w import HDDM_W
10 | 
11 | __all__ = ["DDM", "EDDM", "FHDDM", "HDDM_A", "HDDM_W"]
12 | 


--------------------------------------------------------------------------------
/river/drift/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from .airline_passengers import AirlinePassengers
 4 | from .apple import Apple
 5 | from .bitcoin import Bitcoin
 6 | from .brent_crude_oil import BrentSpotPrice
 7 | from .occupancy import Occupancy
 8 | from .run_log import RunLog
 9 | from .uk_coal_employment import UKCoalEmploy
10 | 
11 | __all__ = [
12 |     "Bitcoin",
13 |     "BrentSpotPrice",
14 |     "UKCoalEmploy",
15 |     "AirlinePassengers",
16 |     "RunLog",
17 |     "Occupancy",
18 |     "Apple",
19 | ]
20 | 


--------------------------------------------------------------------------------
/river/drift/datasets/airline_passengers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import datasets, stream
 4 | 
 5 | from .base import ChangePointFileDataset
 6 | 
 7 | 
 8 | class AirlinePassengers(ChangePointFileDataset):
 9 |     """JFK Airline Passengers
10 | 
11 |     This dataset gives the number of passengers arriving and departing at JFK.
12 |     The data is obtained from New York State's official Kaggle page for this dataset.
13 | 
14 |     References
15 |     ----------
16 |     [^1]: https://www.kaggle.com/new-york-state/nys-air-passenger-traffic,-port-authority-of-ny-nj#air-passenger-traffic-per-month-port-authority-of-ny-nj-beginning-1977.csv
17 | 
18 |     """
19 | 
20 |     def __init__(self):
21 |         super().__init__(
22 |             annotations={"6": [299], "7": [], "8": [302], "9": [326, 382], "10": [296]},
23 |             filename="airline_passengers.csv",
24 |             task=datasets.base.REG,
25 |             n_samples=468,
26 |             n_features=1,
27 |         )
28 | 
29 |     def __iter__(self):
30 |         return stream.iter_csv(
31 |             self.path,
32 |             target="Total Passengers",
33 |             converters={
34 |                 "Total Passengers": int,
35 |             },
36 |             parse_dates={"date": "%Y-%b"},
37 |         )
38 | 


--------------------------------------------------------------------------------
/river/drift/datasets/apple.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import datasets, stream
 4 | 
 5 | from .base import ChangePointFileDataset
 6 | 
 7 | 
 8 | class Apple(ChangePointFileDataset):
 9 |     """Apple Stock
10 | 
11 |     This dataset concerns the daily close price and volume of Apple stock around the year 2000. The dataset is sampled every 3 observations to reduce the length of the time series.
12 |     This dataset is retrieved from Yahoo Finance.
13 | 
14 |     References
15 |     ----------
16 |     [^1]: https://finance.yahoo.com/quote/AAPL/history?period1=850348800&period2=1084579200&interval=1d&filter=history&frequency=1d
17 | 
18 |     """
19 | 
20 |     def __init__(self):
21 |         super().__init__(
22 |             annotations={
23 |                 "6": [319],
24 |                 "7": [319],
25 |                 "8": [319],
26 |                 "9": [53, 90, 197, 276, 319, 403, 463, 535],
27 |                 "10": [319],
28 |             },
29 |             filename="apple.csv",
30 |             task=datasets.base.REG,
31 |             n_samples=1867,
32 |             n_features=6,
33 |         )
34 | 
35 |     def __iter__(self):
36 |         return stream.iter_csv(
37 |             self.path,
38 |             target=["Open", "High", "Low", "Close", "Adj Close", "Volume"],
39 |             converters={
40 |                 "Open": float,
41 |                 "High": float,
42 |                 "Low": float,
43 |                 "Close": float,
44 |                 "Adj Close": float,
45 |                 "Volume": float,
46 |             },
47 |             parse_dates={"Date": "%Y-%m-%d"},
48 |         )
49 | 


--------------------------------------------------------------------------------
/river/drift/datasets/bitcoin.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import datasets, stream
 4 | 
 5 | from .base import ChangePointFileDataset
 6 | 
 7 | 
 8 | class Bitcoin(ChangePointFileDataset):
 9 |     """Bitcoin Market Price
10 | 
11 |     This is a regression task, where the goal is to predict the average USD market price across
12 |     major bitcoin exchanges. This data was collected from the official Blockchain website. There
13 |     is only one feature given, the day of exchange, which is in increments of three. The first
14 |     500 lines have been removed because they are not interesting.
15 | 
16 |     References
17 |     ----------
18 |     [^1]: https://www.blockchain.com/fr/explorer/charts/market-price?timespan=all
19 | 
20 |     """
21 | 
22 |     def __init__(self):
23 |         super().__init__(
24 |             annotations={
25 |                 "6": [502, 580, 702, 747],
26 |                 "8": [583],
27 |                 "12": [597],
28 |                 "13": [522, 579, 591, 629, 703, 747, 760],
29 |                 "14": [93, 522, 540, 701, 747, 760, 772],
30 |             },
31 |             filename="bitcoin.csv",
32 |             task=datasets.base.REG,
33 |             n_samples=822,
34 |             n_features=1,
35 |         )
36 | 
37 |     def __iter__(self):
38 |         return stream.iter_csv(
39 |             self.path,
40 |             target="price",
41 |             converters={
42 |                 "price": float,
43 |             },
44 |             parse_dates={"date": "%Y-%m-%d"},
45 |         )
46 | 


--------------------------------------------------------------------------------
/river/drift/datasets/occupancy.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import datasets, stream
 4 | 
 5 | from .base import ChangePointFileDataset
 6 | 
 7 | 
 8 | class Occupancy(ChangePointFileDataset):
 9 |     """Room occupancy data.
10 | 
11 |     Dataset on detecting room occupancy based on several variables. The dataset contains
12 |     temperature, humidity, light, and CO2 variables.
13 | 
14 |     The data is sampled at every 16 observations to reduce the length of the series.
15 | 
16 |     References
17 |     ----------
18 |     Candanedo, Luis M., and Véronique Feldheim. "Accurate occupancy detection of an office room from light, temperature, humidity and CO2 measurements using statistical learning models." Energy and Buildings 112 (2016): 28-39.
19 | 
20 |     """
21 | 
22 |     def __init__(self):
23 |         super().__init__(
24 |             annotations={
25 |                 "6": [238, 416],
26 |                 "8": [53, 143, 238, 417],
27 |                 "9": [53, 92, 142, 181, 236, 264, 341, 416, 436, 451, 506],
28 |                 "10": [1, 52, 91, 142, 181, 234, 267, 324, 360, 416, 451, 506],
29 |                 "12": [234, 415],
30 |             },
31 |             filename="occupancy.csv",
32 |             task=datasets.base.REG,
33 |             n_samples=509,
34 |             n_features=4,
35 |         )
36 | 
37 |     def __iter__(self):
38 |         return stream.iter_csv(
39 |             self.path,
40 |             target=["V1", "V2", "V3", "V4"],
41 |             converters={
42 |                 "V1": float,
43 |                 "V2": float,
44 |                 "V3": float,
45 |                 "V4": float,
46 |             },
47 |             parse_dates={"time": "%Y-%m-%d %H:%M:%S"},
48 |         )
49 | 


--------------------------------------------------------------------------------
/river/drift/datasets/run_log.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import datasets, stream
 4 | 
 5 | from .base import ChangePointFileDataset
 6 | 
 7 | 
 8 | class RunLog(ChangePointFileDataset):
 9 |     """Interval Training Running Pace.
10 | 
11 |     This dataset shows the pace of a runner during an interval training session, where a mobile
12 |     application provides instructions on when to run and when to walk.
13 | 
14 |     """
15 | 
16 |     def __init__(self):
17 |         super().__init__(
18 |             annotations={
19 |                 "6": [60, 96, 114, 174, 204, 240, 258, 317],
20 |                 "7": [60, 96, 114, 177, 204, 240, 258, 317],
21 |                 "8": [60, 96, 114, 174, 204, 240, 258, 317],
22 |                 "10": [2, 60, 96, 114, 174, 204, 240, 258, 317],
23 |                 "12": [],
24 |             },
25 |             filename="run_log.csv",
26 |             task=datasets.base.REG,
27 |             n_samples=376,
28 |             n_features=2,
29 |         )
30 | 
31 |     def __iter__(self):
32 |         return stream.iter_csv(
33 |             self.path,
34 |             target=["Pace", "Distance"],
35 |             converters={"Pace": float, "Distance": float},
36 |             parse_dates={"time": "%Y-%m-%d %H:%M:%S"},
37 |         )
38 | 


--------------------------------------------------------------------------------
/river/drift/datasets/uk_coal_employment.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import datasets, stream
 4 | 
 5 | from .base import ChangePointFileDataset
 6 | 
 7 | 
 8 | class UKCoalEmploy(ChangePointFileDataset):
 9 |     """Historic Employment in UK Coal Mines
10 | 
11 |     This is historic data obtained from the UK government.
12 |         We use the employment column for the number of workers employed in the British coal mines
13 |         Missing values in the data are replaced with the value of the preceding year.
14 | 
15 |     References
16 |     ----------
17 |     [^1]: https://www.gov.uk/government/statistical-data-sets/historical-coal-data-coal-production-availability-and-consumption
18 |     """
19 | 
20 |     def __init__(self):
21 |         super().__init__(
22 |             annotations={
23 |                 "6": [15, 28, 45, 60, 68, 80],
24 |                 "7": [18, 47, 81],
25 |                 "8": [],
26 |                 "9": [15, 27, 46, 68, 81],
27 |                 "13": [19, 28, 45, 68, 80],
28 |             },
29 |             filename="uk_coal_employment.csv",
30 |             task=datasets.base.REG,
31 |             n_samples=105,
32 |             n_features=1,
33 |         )
34 | 
35 |     def __iter__(self):
36 |         return stream.iter_csv(
37 |             self.path,
38 |             target="Employment",
39 |             converters={
40 |                 "Employment": int,
41 |             },
42 |             parse_dates={"Year": "%Y"},
43 |         )
44 | 


--------------------------------------------------------------------------------
/river/ensemble/__init__.py:
--------------------------------------------------------------------------------
 1 | """Ensemble learning.
 2 | 
 3 | Broadly speaking, there are two kinds of ensemble approaches. There are those that copy a single
 4 | model several times and aggregate the predictions of said copies. This includes bagging as well as
 5 | boosting. Then there are those that are composed of an arbitrary list of models, and can therefore
 6 | aggregate predictions from different kinds of models.
 7 | 
 8 | """
 9 | 
10 | from __future__ import annotations
11 | 
12 | from .bagging import (
13 |     ADWINBaggingClassifier,
14 |     BaggingClassifier,
15 |     BaggingRegressor,
16 |     LeveragingBaggingClassifier,
17 | )
18 | from .boosting import AdaBoostClassifier, ADWINBoostingClassifier, BOLEClassifier
19 | from .ewa import EWARegressor
20 | from .stacking import StackingClassifier
21 | from .streaming_random_patches import SRPClassifier, SRPRegressor
22 | from .voting import VotingClassifier
23 | 
24 | __all__ = [
25 |     "AdaBoostClassifier",
26 |     "ADWINBaggingClassifier",
27 |     "ADWINBoostingClassifier",
28 |     "BaggingClassifier",
29 |     "BaggingRegressor",
30 |     "BOLEClassifier",
31 |     "EWARegressor",
32 |     "LeveragingBaggingClassifier",
33 |     "SRPClassifier",
34 |     "SRPRegressor",
35 |     "StackingClassifier",
36 |     "VotingClassifier",
37 | ]
38 | 


--------------------------------------------------------------------------------
/river/evaluate/__init__.py:
--------------------------------------------------------------------------------
 1 | """Model evaluation.
 2 | 
 3 | This module provides utilities to evaluate an online model. The goal is to reproduce a real-world
 4 | scenario with high fidelity. The core function of this module is `progressive_val_score`, which
 5 | allows to evaluate a model via progressive validation.
 6 | 
 7 | This module also exposes "tracks". A track is a predefined combination of a dataset and one or more
 8 | metrics. This allows a principled manner to compare models with each other. For instance,
 9 | the `RegressionTrack` contains several datasets and metrics to evaluate regression models. There is
10 | also a bare `Track` class to implement a custom track. The `benchmarks` directory at the root of
11 | the River repository uses these tracks.
12 | 
13 | """
14 | 
15 | from __future__ import annotations
16 | 
17 | from .progressive_validation import iter_progressive_val_score, progressive_val_score
18 | from .tracks import BinaryClassificationTrack, MultiClassClassificationTrack, RegressionTrack, Track
19 | 
20 | __all__ = [
21 |     "iter_progressive_val_score",
22 |     "progressive_val_score",
23 |     "BinaryClassificationTrack",
24 |     "MultiClassClassificationTrack",
25 |     "RegressionTrack",
26 |     "Track",
27 | ]
28 | 


--------------------------------------------------------------------------------
/river/facto/__init__.py:
--------------------------------------------------------------------------------
 1 | """Factorization machines."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .ffm import FFMClassifier, FFMRegressor
 6 | from .fm import FMClassifier, FMRegressor
 7 | from .fwfm import FwFMClassifier, FwFMRegressor
 8 | from .hofm import HOFMClassifier, HOFMRegressor
 9 | 
10 | __all__ = [
11 |     "FFMClassifier",
12 |     "FFMRegressor",
13 |     "FMClassifier",
14 |     "FMRegressor",
15 |     "FwFMClassifier",
16 |     "FwFMRegressor",
17 |     "HOFMClassifier",
18 |     "HOFMRegressor",
19 | ]
20 | 


--------------------------------------------------------------------------------
/river/feature_extraction/__init__.py:
--------------------------------------------------------------------------------
 1 | """Feature extraction.
 2 | 
 3 | This module can be used to extract information from raw features. This includes encoding
 4 | categorical data as well as looking at interactions between existing features. This differs from
 5 | the `preprocessing` module, in that the latter's purpose is rather to clean the data so that it may
 6 | be processed by a particular machine learning algorithm.
 7 | 
 8 | """
 9 | 
10 | from __future__ import annotations
11 | 
12 | from .agg import Agg, TargetAgg
13 | from .kernel_approx import RBFSampler
14 | from .poly import PolynomialExtender
15 | from .vectorize import TFIDF, BagOfWords
16 | 
17 | __all__ = [
18 |     "Agg",
19 |     "BagOfWords",
20 |     "PolynomialExtender",
21 |     "RBFSampler",
22 |     "TargetAgg",
23 |     "TFIDF",
24 | ]
25 | 


--------------------------------------------------------------------------------
/river/feature_extraction/test_vectorize.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pytest
 4 | 
 5 | from river import feature_extraction
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     "params, text, expected_ngrams",
10 |     [
11 |         pytest.param(
12 |             *case,
13 |             id=f"#{i}",
14 |         )
15 |         for i, case in enumerate(
16 |             [
17 |                 ({}, "one two three", ["one", "two", "three"]),
18 |                 (
19 |                     {},
20 |                     """one   two\tthree four\t\tfive
21 |             six
22 | 
23 |             seven""",
24 |                     ["one", "two", "three", "four", "five", "six", "seven"],
25 |                 ),
26 |                 (
27 |                     {"ngram_range": (1, 2)},
28 |                     "one two three",
29 |                     ["one", "two", "three", ("one", "two"), ("two", "three")],
30 |                 ),
31 |                 ({"ngram_range": (2, 2)}, "one two three", [("one", "two"), ("two", "three")]),
32 |                 (
33 |                     {"ngram_range": (2, 3)},
34 |                     "one two three",
35 |                     [("one", "two"), ("two", "three"), ("one", "two", "three")],
36 |                 ),
37 |                 ({"stop_words": {"two", "three"}}, "one two three four", ["one", "four"]),
38 |                 (
39 |                     {"stop_words": {"two", "three"}, "ngram_range": (1, 2)},
40 |                     "one two three four",
41 |                     ["one", "four", ("one", "four")],
42 |                 ),
43 |             ]
44 |         )
45 |     ],
46 | )
47 | def test_ngrams(params, text, expected_ngrams):
48 |     bow = feature_extraction.BagOfWords(**params)
49 |     ngrams = list(bow.process_text(text))
50 |     assert expected_ngrams == ngrams
51 | 


--------------------------------------------------------------------------------
/river/feature_selection/__init__.py:
--------------------------------------------------------------------------------
 1 | """Feature selection."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .k_best import SelectKBest
 6 | from .random import PoissonInclusion
 7 | from .variance import VarianceThreshold
 8 | 
 9 | __all__ = ["PoissonInclusion", "SelectKBest", "VarianceThreshold"]
10 | 


--------------------------------------------------------------------------------
/river/forest/__init__.py:
--------------------------------------------------------------------------------
 1 | """This module implements forest-based classifiers and regressors."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .adaptive_random_forest import ARFClassifier, ARFRegressor
 6 | from .aggregated_mondrian_forest import AMFClassifier, AMFRegressor
 7 | from .online_extra_trees import OXTRegressor
 8 | 
 9 | __all__ = [
10 |     "ARFClassifier",
11 |     "ARFRegressor",
12 |     "AMFClassifier",
13 |     "AMFRegressor",
14 |     "OXTRegressor",
15 | ]
16 | 


--------------------------------------------------------------------------------
/river/forest/test_amf.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | 
 4 | def test_issue_1272():
 5 |     """
 6 | 
 7 |     https://github.com/online-ml/river/issues/1272
 8 | 
 9 |     >>> import river
10 |     >>> from river import forest, metrics
11 | 
12 |     >>> model = forest.ARFClassifier(metric=metrics.CrossEntropy())
13 |     >>> model.learn_one({"x": 1}, True)
14 | 
15 |     >>> model = forest.ARFClassifier()
16 |     >>> model.learn_one({"x": 1}, True)
17 | 
18 |     """
19 | 


--------------------------------------------------------------------------------
/river/imblearn/__init__.py:
--------------------------------------------------------------------------------
 1 | """Sampling methods."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .chebyshev import ChebyshevOverSampler, ChebyshevUnderSampler
 6 | from .hard_sampling import HardSamplingClassifier, HardSamplingRegressor
 7 | from .random import RandomOverSampler, RandomSampler, RandomUnderSampler
 8 | 
 9 | __all__ = [
10 |     "ChebyshevOverSampler",
11 |     "ChebyshevUnderSampler",
12 |     "HardSamplingClassifier",
13 |     "HardSamplingRegressor",
14 |     "RandomOverSampler",
15 |     "RandomUnderSampler",
16 |     "RandomSampler",
17 | ]
18 | 


--------------------------------------------------------------------------------
/river/linear_model/__init__.py:
--------------------------------------------------------------------------------
 1 | """Linear models."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from . import base
 6 | from .alma import ALMAClassifier
 7 | from .bayesian_lin_reg import BayesianLinearRegression
 8 | from .lin_reg import LinearRegression
 9 | from .log_reg import LogisticRegression
10 | from .pa import PAClassifier, PARegressor
11 | from .perceptron import Perceptron
12 | from .softmax import SoftmaxRegression
13 | 
14 | __all__ = [
15 |     "base",
16 |     "ALMAClassifier",
17 |     "BayesianLinearRegression",
18 |     "LinearRegression",
19 |     "LogisticRegression",
20 |     "PAClassifier",
21 |     "PARegressor",
22 |     "Perceptron",
23 |     "SoftmaxRegression",
24 | ]
25 | 


--------------------------------------------------------------------------------
/river/metrics/accuracy.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import metrics
 4 | 
 5 | __all__ = ["Accuracy"]
 6 | 
 7 | 
 8 | class Accuracy(metrics.base.MultiClassMetric):
 9 |     """Accuracy score, which is the percentage of exact matches.
10 | 
11 |     Parameters
12 |     ----------
13 |     cm
14 |         This parameter allows sharing the same confusion
15 |         matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage
16 |         and computation time.
17 | 
18 |     Examples
19 |     --------
20 | 
21 |     >>> from river import metrics
22 | 
23 |     >>> y_true = [True, False, True, True, True]
24 |     >>> y_pred = [True, True, False, True, True]
25 | 
26 |     >>> metric = metrics.Accuracy()
27 |     >>> for yt, yp in zip(y_true, y_pred):
28 |     ...     metric.update(yt, yp)
29 | 
30 |     >>> metric
31 |     Accuracy: 60.00%
32 | 
33 |     """
34 | 
35 |     def get(self):
36 |         try:
37 |             return self.cm.total_true_positives / self.cm.total_weight
38 |         except ZeroDivisionError:
39 |             return 0.0
40 | 


--------------------------------------------------------------------------------
/river/metrics/balanced_accuracy.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import metrics
 4 | 
 5 | __all__ = ["BalancedAccuracy"]
 6 | 
 7 | 
 8 | class BalancedAccuracy(metrics.base.MultiClassMetric):
 9 |     """Balanced accuracy.
10 | 
11 |     Balanced accuracy is the average of recall obtained on each class. It is used to
12 |     deal with imbalanced datasets in binary and multi-class classification problems.
13 | 
14 |     Parameters
15 |     ----------
16 |     cm
17 |         This parameter allows sharing the same confusion
18 |         matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage
19 |         and computation time.
20 | 
21 |     Examples
22 |     --------
23 | 
24 |         >>> from river import metrics
25 |         >>> y_true = [True, False, True, True, False, True]
26 |         >>> y_pred = [True, False, True, True, True, False]
27 | 
28 |         >>> metric = metrics.BalancedAccuracy()
29 |         >>> for yt, yp in zip(y_true, y_pred):
30 |         ...     metric.update(yt, yp)
31 | 
32 |         >>> metric
33 |         BalancedAccuracy: 62.50%
34 | 
35 |         >>> y_true = [0, 1, 0, 0, 1, 0]
36 |         >>> y_pred = [0, 1, 0, 0, 0, 1]
37 |         >>> metric = metrics.BalancedAccuracy()
38 |         >>> for yt, yp in zip(y_true, y_pred):
39 |         ...     metric.update(yt, yp)
40 | 
41 |         >>> metric
42 |         BalancedAccuracy: 62.50%
43 | 
44 |     """
45 | 
46 |     def get(self):
47 |         total = 0
48 |         for c in self.cm.classes:
49 |             try:
50 |                 total += self.cm[c][c] / self.cm.sum_row[c]
51 |             except ZeroDivisionError:
52 |                 continue
53 |         try:
54 |             score = total / len(self.cm.classes)
55 | 
56 |             return score
57 | 
58 |         except ZeroDivisionError:
59 |             return 0.0
60 | 


--------------------------------------------------------------------------------
/river/metrics/cross_entropy.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | from river import metrics, utils
 6 | 
 7 | __all__ = ["CrossEntropy"]
 8 | 
 9 | 
10 | class CrossEntropy(metrics.base.MeanMetric, metrics.base.MultiClassMetric):
11 |     """Multiclass generalization of the logarithmic loss.
12 | 
13 |     Examples
14 |     --------
15 | 
16 |     >>> from river import metrics
17 | 
18 |     >>> y_true = [0, 1, 2, 2]
19 |     >>> y_pred = [
20 |     ...     {0: 0.29450637, 1: 0.34216758, 2: 0.36332605},
21 |     ...     {0: 0.21290077, 1: 0.32728332, 2: 0.45981591},
22 |     ...     {0: 0.42860913, 1: 0.33380113, 2: 0.23758974},
23 |     ...     {0: 0.44941979, 1: 0.32962558, 2: 0.22095463}
24 |     ... ]
25 | 
26 |     >>> metric = metrics.CrossEntropy()
27 | 
28 |     >>> for yt, yp in zip(y_true, y_pred):
29 |     ...     metric.update(yt, yp)
30 |     ...     print(metric.get())
31 |     1.222454
32 |     1.169691
33 |     1.258864
34 |     1.321597
35 | 
36 |     >>> metric
37 |     CrossEntropy: 1.321598
38 | 
39 |     """
40 | 
41 |     _fmt = ""
42 | 
43 |     @property
44 |     def bigger_is_better(self):
45 |         return False
46 | 
47 |     @property
48 |     def requires_labels(self):
49 |         return False
50 | 
51 |     def _eval(self, y_true, y_pred):
52 |         total = 0
53 | 
54 |         for label, proba in y_pred.items():
55 |             if y_true == label:
56 |                 total += math.log(utils.math.clamp(x=proba, minimum=1e-15, maximum=1 - 1e-15))
57 | 
58 |         return -total
59 | 


--------------------------------------------------------------------------------
/river/metrics/efficient_rollingrocauc/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | from .efficient_rollingrocauc import EfficientRollingROCAUC
4 | 
5 | __all__ = ["EfficientRollingROCAUC"]
6 | 


--------------------------------------------------------------------------------
/river/metrics/efficient_rollingrocauc/cpp/RollingROCAUC.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef ROLLINGROCAUC_HPP
 2 | #define ROLLINGROCAUC_HPP
 3 | 
 4 | #include <deque>
 5 | #include <set>
 6 | #include <tuple>
 7 | #include <vector>
 8 | 
 9 | namespace rollingrocauc {
10 | 
11 | class RollingROCAUC {
12 |     public:
13 |         RollingROCAUC();
14 |         RollingROCAUC(const int positiveLabel, const long unsigned windowSize);
15 | 
16 |         virtual ~RollingROCAUC() = default;
17 | 
18 |         // Calls insert() and removeLast if needed
19 |         virtual void update(const int label, const double score);
20 | 
21 |         // Erase the most recent instance with content equal to params
22 |         virtual void revert(const int label, const double score);
23 | 
24 |         // Calculates the ROCAUC and return it
25 |         virtual double get() const;
26 | 
27 |         // Returns y_true as a vector
28 |         virtual std::vector<int> getTrueLabels() const;
29 | 
30 |         // Returns y_score as a vector
31 |         virtual std::vector<double> getScores() const;
32 | 
33 |     private:
34 |         // Insert instance based on params
35 |         virtual void insert(const int label, const double score);
36 | 
37 |         // Remove oldest instance
38 |         virtual void removeLast();
39 | 
40 |         int positiveLabel;
41 | 
42 |         std::size_t windowSize;
43 |         std::size_t positives;
44 | 
45 |         // window maintains a queue of the instances to store the temporal
46 |         // aspect of the stream. Using deque to allow revert()
47 |         std::deque<std::tuple<double, int>> window;
48 | 
49 |         // orderedWindow maintains a multiset (implemented as a tree) to store
50 |         // the instances sorted
51 |         std::multiset<std::tuple<double, int>> orderedWindow;
52 | };
53 | 
54 | } // namespace rollingrocauc
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/river/metrics/efficient_rollingrocauc/efficient_rollingrocauc.pxd:
--------------------------------------------------------------------------------
 1 | from libcpp.vector cimport vector
 2 | 
 3 | cdef extern from "cpp/RollingROCAUC.cpp":
 4 |     pass
 5 | 
 6 | cdef extern from "cpp/RollingROCAUC.hpp" namespace "rollingrocauc":
 7 |     cdef cppclass RollingROCAUC:
 8 |         RollingROCAUC(int positiveLabel, int windowSize) except +
 9 |         void update(int label, double score)
10 |         void revert(int label, double score)
11 |         double get()
12 |         vector[int] getTrueLabels()
13 |         vector[double] getScores()
14 | 


--------------------------------------------------------------------------------
/river/metrics/efficient_rollingrocauc/efficient_rollingrocauc.pyi:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | from typing import Any
 3 | 
 4 | class EfficientRollingROCAUC:
 5 |     def __cinit__(self, positiveLabel: int, windowSize: int) -> None: ...
 6 |     def __dealloc__(self) -> None: ...
 7 |     def update(self, label: bool, score: bool | float | dict[bool, float]) -> None: ...
 8 |     def revert(self, label: bool, score: bool | float | dict[bool, float]) -> None: ...
 9 |     def get(self) -> float: ...
10 |     def __getnewargs_ex__(self) -> tuple[tuple[int, int], dict[str, Any]]: ...
11 |     def __getstate__(self) -> tuple[Sequence[int], Sequence[float]]: ...
12 |     def __setstate__(self, state: tuple[Sequence[int], Sequence[float]]) -> None: ...
13 | 


--------------------------------------------------------------------------------
/river/metrics/expected_mutual_info.pyi:
--------------------------------------------------------------------------------
1 | from river import metrics
2 | 
3 | def expected_mutual_info(confusion_matrix: metrics.ConfusionMatrix) -> float: ...
4 | 


--------------------------------------------------------------------------------
/river/metrics/log_loss.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | from river import metrics
 6 | 
 7 | __all__ = ["LogLoss"]
 8 | 
 9 | 
10 | class LogLoss(metrics.base.MeanMetric, metrics.base.BinaryMetric):
11 |     """Binary logarithmic loss.
12 | 
13 |     Examples
14 |     --------
15 | 
16 |     >>> from river import metrics
17 | 
18 |     >>> y_true = [True, False, False, True]
19 |     >>> y_pred = [0.9,  0.1,   0.2,   0.65]
20 | 
21 |     >>> metric = metrics.LogLoss()
22 |     >>> for yt, yp in zip(y_true, y_pred):
23 |     ...     metric.update(yt, yp)
24 |     ...     print(metric.get())
25 |     0.105360
26 |     0.105360
27 |     0.144621
28 |     0.216161
29 | 
30 |     >>> metric
31 |     LogLoss: 0.216162
32 | 
33 |     """
34 | 
35 |     _fmt = ""
36 | 
37 |     @property
38 |     def bigger_is_better(self):
39 |         return False
40 | 
41 |     @property
42 |     def requires_labels(self):
43 |         return False
44 | 
45 |     def _eval(self, y_true, y_pred):
46 |         p_true = y_pred.get(True, 0.0) if isinstance(y_pred, dict) else y_pred
47 |         p_true = self._clamp_proba(p_true)
48 |         if y_true:
49 |             return -math.log(p_true)
50 |         return -math.log(1 - p_true)
51 | 


--------------------------------------------------------------------------------
/river/metrics/mae.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import metrics
 4 | 
 5 | __all__ = ["MAE"]
 6 | 
 7 | 
 8 | class MAE(metrics.base.MeanMetric, metrics.base.RegressionMetric):
 9 |     """Mean absolute error.
10 | 
11 |     Examples
12 |     --------
13 | 
14 |     >>> from river import metrics
15 | 
16 |     >>> y_true = [3, -0.5, 2, 7]
17 |     >>> y_pred = [2.5, 0.0, 2, 8]
18 | 
19 |     >>> metric = metrics.MAE()
20 | 
21 |     >>> for yt, yp in zip(y_true, y_pred):
22 |     ...     metric.update(yt, yp)
23 |     ...     print(metric.get())
24 |     0.5
25 |     0.5
26 |     0.333
27 |     0.5
28 | 
29 |     >>> metric
30 |     MAE: 0.5
31 | 
32 |     """
33 | 
34 |     def _eval(self, y_true, y_pred):
35 |         return abs(y_true - y_pred)
36 | 


--------------------------------------------------------------------------------
/river/metrics/mape.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import metrics
 4 | 
 5 | __all__ = ["MAPE"]
 6 | 
 7 | 
 8 | class MAPE(metrics.base.MeanMetric, metrics.base.RegressionMetric):
 9 |     """Mean absolute percentage error.
10 | 
11 |     Examples
12 |     --------
13 | 
14 |     >>> from river import metrics
15 | 
16 |     >>> y_true = [3, -0.5, 2, 7]
17 |     >>> y_pred = [2.5, 0.0, 2, 8]
18 | 
19 |     >>> metric = metrics.MAPE()
20 |     >>> for yt, yp in zip(y_true, y_pred):
21 |     ...     metric.update(yt, yp)
22 | 
23 |     >>> metric
24 |     MAPE: 32.738095
25 | 
26 |     """
27 | 
28 |     def _eval(self, y_true, y_pred):
29 |         if y_true == 0:
30 |             return 0.0
31 |         return abs(y_true - y_pred) / abs(y_true)
32 | 
33 |     def get(self):
34 |         return 100 * super().get()
35 | 


--------------------------------------------------------------------------------
/river/metrics/mcc.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | from river import metrics
 6 | 
 7 | __all__ = ["MCC"]
 8 | 
 9 | 
10 | class MCC(metrics.base.BinaryMetric):
11 |     """Matthews correlation coefficient.
12 | 
13 |     Parameters
14 |     ----------
15 |     cm
16 |         This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a
17 |         confusion matrix reduces the amount of storage and computation time.
18 |     pos_val
19 |         Value to treat as "positive".
20 | 
21 |     Examples
22 |     --------
23 | 
24 |     >>> from river import metrics
25 | 
26 |     >>> y_true = [True, True, True, False]
27 |     >>> y_pred = [True, False, True, True]
28 | 
29 |     >>> mcc = metrics.MCC()
30 | 
31 |     >>> for yt, yp in zip(y_true, y_pred):
32 |     ...     mcc.update(yt, yp)
33 | 
34 |     >>> mcc
35 |     MCC: -0.333333
36 | 
37 |     References
38 |     ----------
39 |     [^1]: [Wikipedia article](https://www.wikiwand.com/en/Matthews_correlation_coefficient)
40 | 
41 |     """
42 | 
43 |     _fmt = ""
44 | 
45 |     def get(self):
46 |         tp = self.cm.true_positives(self.pos_val)
47 |         tn = self.cm.true_negatives(self.pos_val)
48 |         fp = self.cm.false_positives(self.pos_val)
49 |         fn = self.cm.false_negatives(self.pos_val)
50 | 
51 |         n = (tp + tn + fp + fn) or 1
52 |         s = (tp + fn) / n
53 |         p = (tp + fp) / n
54 | 
55 |         try:
56 |             return (tp / n - s * p) / math.sqrt(p * s * (1 - s) * (1 - p))
57 |         except ZeroDivisionError:
58 |             return 0.0
59 | 


--------------------------------------------------------------------------------
/river/metrics/multioutput/__init__.py:
--------------------------------------------------------------------------------
 1 | """Metrics for multi-output learning."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from . import base
 6 | from .confusion import MultiLabelConfusionMatrix
 7 | from .exact_match import ExactMatch
 8 | from .macro import MacroAverage
 9 | from .micro import MicroAverage
10 | from .per_output import PerOutput
11 | from .sample_average import SampleAverage
12 | 
13 | __all__ = [
14 |     "base",
15 |     "MacroAverage",
16 |     "MultiLabelConfusionMatrix",
17 |     "ExactMatch",
18 |     "MicroAverage",
19 |     "PerOutput",
20 |     "SampleAverage",
21 | ]
22 | 


--------------------------------------------------------------------------------
/river/metrics/multioutput/exact_match.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import metrics
 4 | from river.metrics.multioutput.base import MultiOutputClassificationMetric
 5 | 
 6 | __all__ = ["ExactMatch"]
 7 | 
 8 | 
 9 | class ExactMatch(metrics.base.MeanMetric, MultiOutputClassificationMetric):
10 |     """Exact match score.
11 | 
12 |     This is the most strict multi-label metric, defined as the number of
13 |     samples that have all their labels correctly classified, divided by the
14 |     total number of samples.
15 | 
16 |     Parameters
17 |     ----------
18 |     cm
19 |         This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a
20 |         confusion matrix reduces the amount of storage and computation time.
21 | 
22 |     Examples
23 |     --------
24 | 
25 |     >>> from river import metrics
26 | 
27 |     >>> y_true = [
28 |     ...     {0: False, 1: True, 2: True},
29 |     ...     {0: True, 1: True, 2: False},
30 |     ...     {0: True, 1: True, 2: False},
31 |     ... ]
32 | 
33 |     >>> y_pred = [
34 |     ...     {0: True, 1: True, 2: True},
35 |     ...     {0: True, 1: False, 2: False},
36 |     ...     {0: True, 1: True, 2: False},
37 |     ... ]
38 | 
39 |     >>> metric = metrics.multioutput.ExactMatch()
40 |     >>> for yt, yp in zip(y_true, y_pred):
41 |     ...     metric.update(yt, yp)
42 | 
43 |     >>> metric
44 |     ExactMatch: 33.33%
45 | 
46 |     """
47 | 
48 |     def _eval(self, y_true, y_pred):
49 |         return y_true == y_pred
50 | 


--------------------------------------------------------------------------------
/river/metrics/multioutput/macro.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import statistics
 4 | from collections import defaultdict
 5 | from copy import deepcopy
 6 | from functools import partial
 7 | 
 8 | from river import metrics, utils
 9 | from river.metrics.multioutput.base import MultiOutputMetric
10 | 
11 | __all__ = ["MacroAverage"]
12 | 
13 | 
14 | class MacroAverage(MultiOutputMetric, metrics.base.WrapperMetric):
15 |     """Macro-average wrapper.
16 | 
17 |     A copy of the provided metric is made for each output. The arithmetic average of all the
18 |     metrics is returned.
19 | 
20 |     Parameters
21 |     ----------
22 |     metric
23 |         A classification or a regression metric.
24 | 
25 |     """
26 | 
27 |     def __init__(self, metric):
28 |         self._metric = metric
29 |         self.metrics = defaultdict(partial(deepcopy, self._metric))
30 | 
31 |     @property
32 |     def metric(self):
33 |         return self._metric
34 | 
35 |     def works_with(self, model) -> bool:
36 |         if isinstance(self.metric, metrics.base.ClassificationMetric):
37 |             return utils.inspect.ismoclassifier(model)
38 |         return utils.inspect.ismoregressor(model)
39 | 
40 |     def update(self, y_true, y_pred, w=1.0):
41 |         for i in y_true:
42 |             self.metrics[i].update(y_true[i], y_pred[i], w)
43 | 
44 |     def revert(self, y_true, y_pred, w=1.0):
45 |         for i in y_true:
46 |             self.metrics[i].revert(y_true[i], y_pred[i], w)
47 | 
48 |     def get(self):
49 |         return statistics.mean(metric.get() for metric in self.metrics.values())
50 | 


--------------------------------------------------------------------------------
/river/metrics/multioutput/micro.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import metrics, utils
 4 | from river.metrics.multioutput.base import MultiOutputMetric
 5 | 
 6 | __all__ = ["MicroAverage"]
 7 | 
 8 | 
 9 | class MicroAverage(MultiOutputMetric, metrics.base.WrapperMetric):
10 |     """Micro-average wrapper.
11 | 
12 |     The provided metric is updated with the value of each output.
13 | 
14 |     Parameters
15 |     ----------
16 |     metric
17 |         A classification or a regression metric.
18 | 
19 |     """
20 | 
21 |     def __init__(self, metric):
22 |         self._metric = metric
23 | 
24 |     @property
25 |     def metric(self):
26 |         return self._metric
27 | 
28 |     def works_with(self, model) -> bool:
29 |         if isinstance(self.metric, metrics.base.ClassificationMetric):
30 |             return utils.inspect.ismoclassifier(model)
31 |         return utils.inspect.ismoregressor(model)
32 | 
33 |     def update(self, y_true, y_pred, w=1.0):
34 |         for i in y_true:
35 |             self.metric.update(y_true[i], y_pred[i], w)
36 | 
37 |     def revert(self, y_true, y_pred, w=1.0):
38 |         for i in y_true:
39 |             self.metric.revert(y_true[i], y_pred[i], w)
40 | 
41 |     def get(self):
42 |         return self.metric.get()
43 | 


--------------------------------------------------------------------------------
/river/metrics/multioutput/per_output.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from collections import defaultdict
 4 | from copy import deepcopy
 5 | from functools import partial
 6 | 
 7 | from river import metrics, utils
 8 | from river.metrics.multioutput.base import MultiOutputMetric
 9 | 
10 | __all__ = ["PerOutput"]
11 | 
12 | 
13 | class PerOutput(MultiOutputMetric, metrics.base.WrapperMetric):
14 |     """Per-output wrapper.
15 | 
16 |     A copy of the metric is maintained for each output.
17 | 
18 |     Parameters
19 |     ----------
20 |     metric
21 |         A classification or a regression metric.
22 | 
23 |     """
24 | 
25 |     def __init__(self, metric):
26 |         self._metric = metric
27 |         self.metrics = defaultdict(partial(deepcopy, self._metric))
28 | 
29 |     @property
30 |     def metric(self):
31 |         return self._metric
32 | 
33 |     def works_with(self, model) -> bool:
34 |         if isinstance(self.metric, metrics.base.ClassificationMetric):
35 |             return utils.inspect.ismoclassifier(model)
36 |         return utils.inspect.ismoregressor(model)
37 | 
38 |     def update(self, y_true, y_pred, w=1.0):
39 |         for i in y_true:
40 |             self.metrics[i].update(y_true[i], y_pred[i], w)
41 | 
42 |     def revert(self, y_true, y_pred, w=1.0):
43 |         for i in y_true:
44 |             self.metrics[i].revert(y_true[i], y_pred[i], w)
45 | 
46 |     def get(self):
47 |         return dict(self.metrics)
48 | 
49 |     def __repr__(self):
50 |         return "\n".join(f"{i} - {metric}" for i, metric in self.metrics.items())
51 | 


--------------------------------------------------------------------------------
/river/metrics/smape.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import metrics
 4 | 
 5 | __all__ = ["SMAPE"]
 6 | 
 7 | 
 8 | class SMAPE(metrics.base.MeanMetric, metrics.base.RegressionMetric):
 9 |     """Symmetric mean absolute percentage error.
10 | 
11 |     Examples
12 |     --------
13 | 
14 |     >>> from river import metrics
15 | 
16 |     >>> y_true = [0, 0.07533, 0.07533, 0.07533, 0.07533, 0.07533, 0.07533, 0.0672, 0.0672]
17 |     >>> y_pred = [0, 0.102, 0.107, 0.047, 0.1, 0.032, 0.047, 0.108, 0.089]
18 | 
19 |     >>> metric = metrics.SMAPE()
20 |     >>> for yt, yp in zip(y_true, y_pred):
21 |     ...     metric.update(yt, yp)
22 | 
23 |     >>> metric
24 |     SMAPE: 37.869392
25 | 
26 |     """
27 | 
28 |     def _eval(self, y_true, y_pred):
29 |         den = abs(y_true) + abs(y_pred)
30 |         if den == 0:
31 |             return 0.0
32 |         return 2.0 * abs(y_true - y_pred) / den
33 | 
34 |     def get(self):
35 |         return 100 * super().get()
36 | 


--------------------------------------------------------------------------------
/river/metrics/test_confusion.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import datasets, evaluate, linear_model, metrics, optim, preprocessing
 4 | 
 5 | 
 6 | def test_issue_1443():
 7 |     dataset = datasets.Phishing()
 8 | 
 9 |     model = preprocessing.StandardScaler() | linear_model.LogisticRegression(
10 |         optimizer=optim.SGD(0.1)
11 |     )
12 | 
13 |     metric = metrics.ConfusionMatrix()
14 | 
15 |     for _ in evaluate.iter_progressive_val_score(dataset, model, metric):
16 |         pass
17 | 
18 | 
19 | def test_confusion_and_other_metrics():
20 |     """
21 | 
22 |     >>> dataset = datasets.Phishing()
23 | 
24 |     >>> model = preprocessing.StandardScaler() | linear_model.LogisticRegression(
25 |     ...     optimizer=optim.SGD(0.1)
26 |     ... )
27 | 
28 |     >>> metric = metrics.ConfusionMatrix() + metrics.F1() + metrics.Accuracy()
29 | 
30 |     >>> evaluate.progressive_val_score(dataset, model, metric)
31 |             False   True
32 |     False     613     89
33 |      True      49    499
34 |     F1: 87.85%
35 |     Accuracy: 88.96%
36 | 
37 |     """
38 | 


--------------------------------------------------------------------------------
/river/metrics/test_cross_entropy.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | from sklearn import metrics as sk_metrics
 6 | 
 7 | from river import metrics
 8 | 
 9 | 
10 | def test_cross_entropy():
11 |     metric = metrics.CrossEntropy()
12 | 
13 |     y_true = [0, 1, 2, 2]
14 |     y_pred = [
15 |         [0.29450637, 0.34216758, 0.36332605],
16 |         [0.21290077, 0.32728332, 0.45981591],
17 |         [0.42860913, 0.33380113, 0.23758974],
18 |         [0.44941979, 0.32962558, 0.22095463],
19 |     ]
20 | 
21 |     for i, (yt, yp) in enumerate(zip(y_true, y_pred)):
22 |         yp = dict(enumerate(yp))
23 |         metric.update(yt, yp)
24 | 
25 |         if i >= 1:
26 |             assert math.isclose(
27 |                 metric.get(),
28 |                 sk_metrics.log_loss(y_true[: i + 1], y_pred[: i + 1], labels=[0, 1, 2]),
29 |             )
30 | 
31 |     metric.revert(y_true[-1], dict(enumerate(y_pred[-1])))
32 |     assert math.isclose(metric.get(), sk_metrics.log_loss(y_true[:-1], y_pred[:-1]))
33 | 


--------------------------------------------------------------------------------
/river/metrics/test_log_loss.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | from sklearn import metrics as sk_metrics
 6 | 
 7 | from river import metrics
 8 | 
 9 | 
10 | def test_log_loss():
11 |     metric = metrics.LogLoss()
12 | 
13 |     y_true = [True, False, False, True]
14 |     y_pred = [0.9, 0.1, 0.2, 0.65]
15 | 
16 |     for i, (yt, yp) in enumerate(zip(y_true, y_pred)):
17 |         metric.update(yt, yp)
18 | 
19 |         if i >= 1:
20 |             assert math.isclose(metric.get(), sk_metrics.log_loss(y_true[: i + 1], y_pred[: i + 1]))
21 | 
22 |     metric.revert(y_true[-1], y_pred[-1])
23 |     assert math.isclose(metric.get(), sk_metrics.log_loss(y_true[:-1], y_pred[:-1]))
24 | 


--------------------------------------------------------------------------------
/river/misc/__init__.py:
--------------------------------------------------------------------------------
 1 | """Miscellaneous.
 2 | 
 3 | This module essentially regroups some implementations that have nowhere else to go.
 4 | 
 5 | """
 6 | 
 7 | from __future__ import annotations
 8 | 
 9 | from .sdft import SDFT
10 | from .skyline import Skyline
11 | 
12 | __all__ = ["SDFT", "Skyline"]
13 | 


--------------------------------------------------------------------------------
/river/model_selection/__init__.py:
--------------------------------------------------------------------------------
 1 | """Model selection.
 2 | 
 3 | This module regroups a variety of methods that may be used for performing model selection. An
 4 | model selector is provided with a list of models. These are called "experts" in the expert learning
 5 | literature. The model selector's goal is to perform at least as well as the best model. Indeed,
 6 | initially, the best model is not known. The performance of each model becomes more apparent as time
 7 | goes by. Different strategies are possible, each one offering a different tradeoff in terms of
 8 | accuracy and computational performance.
 9 | 
10 | Model selection can be used for tuning the hyperparameters of a model. This may be done by creating
11 | a copy of the model for each set of hyperparameters, and treating each copy as a separate model.
12 | The `utils.expand_param_grid` function can be used for this purpose.
13 | 
14 | """
15 | 
16 | from __future__ import annotations
17 | 
18 | from . import base
19 | from .bandit import BanditClassifier, BanditRegressor
20 | from .greedy import GreedyRegressor
21 | from .sh import SuccessiveHalvingClassifier, SuccessiveHalvingRegressor
22 | 
23 | __all__ = [
24 |     "base",
25 |     "BanditClassifier",
26 |     "BanditRegressor",
27 |     "GreedyRegressor",
28 |     "SuccessiveHalvingClassifier",
29 |     "SuccessiveHalvingRegressor",
30 | ]
31 | 


--------------------------------------------------------------------------------
/river/multiclass/__init__.py:
--------------------------------------------------------------------------------
 1 | """Multi-class classification."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .occ import OutputCodeClassifier
 6 | from .ovo import OneVsOneClassifier
 7 | from .ovr import OneVsRestClassifier
 8 | 
 9 | __all__ = ["OutputCodeClassifier", "OneVsOneClassifier", "OneVsRestClassifier"]
10 | 


--------------------------------------------------------------------------------
/river/multiclass/test_ovr.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from river import datasets, linear_model, metrics, multiclass, preprocessing, stream
 6 | 
 7 | 
 8 | def test_online_batch_consistent():
 9 |     # Batch
10 | 
11 |     batch = preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(
12 |         linear_model.LogisticRegression()
13 |     )
14 | 
15 |     dataset = datasets.ImageSegments()
16 | 
17 |     batch_metric = metrics.MacroF1()
18 | 
19 |     for i, x in enumerate(pd.read_csv(dataset.path, chunksize=1)):
20 |         y = x.pop("category")
21 |         y_pred = batch.predict_many(x)
22 |         batch.learn_many(x, y)
23 | 
24 |         for yt, yp in zip(y, y_pred):
25 |             if yp is not None:
26 |                 batch_metric.update(yt, yp)
27 | 
28 |         if i == 30:
29 |             break
30 | 
31 |     # Online
32 | 
33 |     online = preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(
34 |         linear_model.LogisticRegression()
35 |     )
36 | 
37 |     online_metric = metrics.MacroF1()
38 | 
39 |     X = pd.read_csv(dataset.path)
40 |     Y = X.pop("category")
41 | 
42 |     for i, (x, y) in enumerate(stream.iter_pandas(X, Y)):
43 |         y_pred = online.predict_one(x)
44 |         online.learn_one(x, y)
45 | 
46 |         if y_pred is not None:
47 |             online_metric.update(y, y_pred)
48 | 
49 |         if i == 30:
50 |             break
51 | 
52 |     assert online_metric.get() == batch_metric.get()
53 | 


--------------------------------------------------------------------------------
/river/multioutput/__init__.py:
--------------------------------------------------------------------------------
 1 | """Multi-output models."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .chain import (
 6 |     ClassifierChain,
 7 |     MonteCarloClassifierChain,
 8 |     ProbabilisticClassifierChain,
 9 |     RegressorChain,
10 | )
11 | from .encoder import MultiClassEncoder
12 | 
13 | __all__ = [
14 |     "ClassifierChain",
15 |     "MonteCarloClassifierChain",
16 |     "MultiClassEncoder",
17 |     "ProbabilisticClassifierChain",
18 |     "RegressorChain",
19 | ]
20 | 


--------------------------------------------------------------------------------
/river/naive_bayes/__init__.py:
--------------------------------------------------------------------------------
 1 | """Naive Bayes algorithms."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from .bernoulli import BernoulliNB
 6 | from .complement import ComplementNB
 7 | from .gaussian import GaussianNB
 8 | from .multinomial import MultinomialNB
 9 | 
10 | __all__ = ["BernoulliNB", "ComplementNB", "GaussianNB", "MultinomialNB"]
11 | 


--------------------------------------------------------------------------------
/river/neighbors/__init__.py:
--------------------------------------------------------------------------------
 1 | """Neighbors-based learning.
 2 | 
 3 | Also known as *lazy* methods. In these methods, generalisation of the training data is delayed
 4 | until a query is received.
 5 | 
 6 | """
 7 | 
 8 | from __future__ import annotations
 9 | 
10 | from .ann import SWINN
11 | from .knn_classifier import KNNClassifier
12 | from .knn_regressor import KNNRegressor
13 | from .lazy import LazySearch
14 | 
15 | __all__ = [
16 |     "LazySearch",
17 |     "KNNClassifier",
18 |     "KNNRegressor",
19 |     "SWINN",
20 | ]
21 | 


--------------------------------------------------------------------------------
/river/neighbors/ann/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | from .swinn import SWINN
4 | 
5 | __all__ = ["SWINN"]
6 | 


--------------------------------------------------------------------------------
/river/neighbors/base.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import abc
 4 | import typing
 5 | 
 6 | from river import base
 7 | 
 8 | 
 9 | class DistanceFunc(typing.Protocol):
10 |     def __call__(self, a: typing.Any, b: typing.Any, **kwargs) -> float: ...
11 | 
12 | 
13 | class FunctionWrapper:
14 |     """Wrapper used to make distance function work with KNNClassifier and
15 |     KNNRegressor.
16 | 
17 |     The k-NN-based classifier and regressor store tuples with `(x, y)`, but only
18 |     `x` is used for distance calculations. This wrapper makes sure `x` is accessed
19 |     when calculating the distances.
20 | 
21 |     Parameters
22 |     ----------
23 |     distance_function
24 |         The custom distance function to be wrapped.
25 |     """
26 | 
27 |     def __init__(self, distance_function: DistanceFunc):
28 |         self.distance_function = distance_function
29 | 
30 |     def __call__(self, a, b):
31 |         # Access x, which is stored in a tuple (x, y)
32 |         return self.distance_function(a[0], b[0])
33 | 
34 | 
35 | class BaseNN(base.Estimator, abc.ABC):
36 |     def __init__(self, dist_func: DistanceFunc | FunctionWrapper):
37 |         self.dist_func = dist_func
38 | 
39 |     @abc.abstractmethod
40 |     def append(self, item: typing.Any, **kwargs) -> None:
41 |         pass
42 | 
43 |     @abc.abstractmethod
44 |     def search(self, item: typing.Any, n_neighbors: int, **kwargs) -> tuple[list, list]:
45 |         pass
46 | 


--------------------------------------------------------------------------------
/river/neural_net/__init__.py:
--------------------------------------------------------------------------------
1 | """Neural networks."""
2 | 
3 | from __future__ import annotations
4 | 
5 | from . import activations
6 | from .mlp import MLPRegressor
7 | 
8 | __all__ = ["activations", "MLPRegressor"]
9 | 


--------------------------------------------------------------------------------
/river/optim/__init__.py:
--------------------------------------------------------------------------------
 1 | """Stochastic optimization."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from . import base, initializers, losses, schedulers
 6 | from .ada_bound import AdaBound
 7 | from .ada_delta import AdaDelta
 8 | from .ada_grad import AdaGrad
 9 | from .ada_max import AdaMax
10 | from .adam import Adam
11 | from .ams_grad import AMSGrad
12 | from .average import Averager
13 | from .ftrl import FTRLProximal
14 | from .momentum import Momentum
15 | from .nadam import Nadam
16 | from .nesterov import NesterovMomentum
17 | from .rms_prop import RMSProp
18 | from .sgd import SGD
19 | 
20 | __all__ = [
21 |     "base",
22 |     "AdaBound",
23 |     "AdaDelta",
24 |     "AdaGrad",
25 |     "Adam",
26 |     "AMSGrad",
27 |     "AdaMax",
28 |     "Averager",
29 |     "FTRLProximal",
30 |     "initializers",
31 |     "losses",
32 |     "Momentum",
33 |     "Nadam",
34 |     "NesterovMomentum",
35 |     "Optimizer",
36 |     "RMSProp",
37 |     "schedulers",
38 |     "SGD",
39 | ]
40 | 


--------------------------------------------------------------------------------
/river/optim/ada_grad.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import collections
 4 | 
 5 | from river import optim
 6 | 
 7 | __all__ = ["AdaGrad"]
 8 | 
 9 | 
10 | class AdaGrad(optim.base.Optimizer):
11 |     """AdaGrad optimizer.
12 | 
13 |     Parameters
14 |     ----------
15 |     lr
16 |     eps
17 | 
18 |     Attributes
19 |     ----------
20 |     g2 : collections.defaultdict
21 | 
22 |     Examples
23 |     --------
24 | 
25 |     >>> from river import datasets
26 |     >>> from river import evaluate
27 |     >>> from river import linear_model
28 |     >>> from river import metrics
29 |     >>> from river import optim
30 |     >>> from river import preprocessing
31 | 
32 |     >>> dataset = datasets.Phishing()
33 |     >>> optimizer = optim.AdaGrad()
34 |     >>> model = (
35 |     ...     preprocessing.StandardScaler() |
36 |     ...     linear_model.LogisticRegression(optimizer)
37 |     ... )
38 |     >>> metric = metrics.F1()
39 | 
40 |     >>> evaluate.progressive_val_score(dataset, model, metric)
41 |     F1: 88.01%
42 | 
43 |     References
44 |     ----------
45 |     [^1]: [Duchi, J., Hazan, E. and Singer, Y., 2011. Adaptive subgradient methods for online learning and stochastic optimization. Journal of machine learning research, 12(Jul), pp.2121-2159.](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
46 | 
47 |     """
48 | 
49 |     def __init__(self, lr=0.1, eps=1e-8):
50 |         super().__init__(lr)
51 |         self.eps = eps
52 |         self.g2 = collections.defaultdict(float)
53 | 
54 |     def _step_with_dict(self, w, g):
55 |         for i, gi in g.items():
56 |             self.g2[i] += gi**2
57 |             w[i] -= self.learning_rate / (self.g2[i] + self.eps) ** 0.5 * gi
58 | 
59 |         return w
60 | 


--------------------------------------------------------------------------------
/river/optim/momentum.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import collections
 4 | 
 5 | from river import optim
 6 | 
 7 | __all__ = ["Momentum"]
 8 | 
 9 | 
10 | class Momentum(optim.base.Optimizer):
11 |     """Momentum optimizer.
12 | 
13 |     Parameters
14 |     ----------
15 |     lr
16 |     rho
17 | 
18 |     Examples
19 |     --------
20 | 
21 |     >>> from river import datasets
22 |     >>> from river import evaluate
23 |     >>> from river import linear_model
24 |     >>> from river import metrics
25 |     >>> from river import optim
26 |     >>> from river import preprocessing
27 | 
28 |     >>> dataset = datasets.Phishing()
29 |     >>> optimizer = optim.Momentum()
30 |     >>> model = (
31 |     ...     preprocessing.StandardScaler() |
32 |     ...     linear_model.LogisticRegression(optimizer)
33 |     ... )
34 |     >>> metric = metrics.F1()
35 | 
36 |     >>> evaluate.progressive_val_score(dataset, model, metric)
37 |     F1: 84.09%
38 | 
39 |     """
40 | 
41 |     def __init__(self, lr=0.1, rho=0.9):
42 |         super().__init__(lr)
43 |         self.rho = rho
44 |         self.s = collections.defaultdict(float)
45 | 
46 |     def _step_with_dict(self, w, g):
47 |         for i, gi in g.items():
48 |             self.s[i] = self.rho * self.s[i] + self.learning_rate * gi
49 |             w[i] -= self.s[i]
50 | 
51 |         return w
52 | 


--------------------------------------------------------------------------------
/river/optim/nesterov.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import collections
 4 | 
 5 | from river import optim
 6 | 
 7 | __all__ = ["NesterovMomentum"]
 8 | 
 9 | 
10 | class NesterovMomentum(optim.base.Optimizer):
11 |     """Nesterov Momentum optimizer.
12 | 
13 |     Parameters
14 |     ----------
15 |     lr
16 |     rho
17 | 
18 |     Examples
19 |     --------
20 | 
21 |     >>> from river import datasets
22 |     >>> from river import evaluate
23 |     >>> from river import linear_model
24 |     >>> from river import metrics
25 |     >>> from river import optim
26 |     >>> from river import preprocessing
27 | 
28 |     >>> dataset = datasets.Phishing()
29 |     >>> optimizer = optim.NesterovMomentum()
30 |     >>> model = (
31 |     ...     preprocessing.StandardScaler() |
32 |     ...     linear_model.LogisticRegression(optimizer)
33 |     ... )
34 |     >>> metric = metrics.F1()
35 | 
36 |     >>> evaluate.progressive_val_score(dataset, model, metric)
37 |     F1: 84.22%
38 | 
39 |     """
40 | 
41 |     def __init__(self, lr=0.1, rho=0.9):
42 |         super().__init__(lr)
43 |         self.rho = rho
44 |         self.s = collections.defaultdict(float)
45 | 
46 |     def look_ahead(self, w):
47 |         for i in w:
48 |             w[i] -= self.rho * self.s[i]
49 | 
50 |         return w
51 | 
52 |     def _step_with_dict(self, w, g):
53 |         # Move w back to it's initial position
54 |         for i in w:
55 |             w[i] += self.rho * self.s[i]
56 | 
57 |         for i, gi in g.items():
58 |             self.s[i] = self.rho * self.s[i] + self.learning_rate * gi
59 |             w[i] -= self.s[i]
60 | 
61 |         return w
62 | 


--------------------------------------------------------------------------------
/river/optim/sgd.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import optim
 4 | 
 5 | __all__ = ["SGD"]
 6 | 
 7 | 
 8 | class SGD(optim.base.Optimizer):
 9 |     """Plain stochastic gradient descent.
10 | 
11 |     Parameters
12 |     ----------
13 |     lr
14 | 
15 |     Examples
16 |     --------
17 | 
18 |     >>> from river import datasets
19 |     >>> from river import evaluate
20 |     >>> from river import linear_model
21 |     >>> from river import metrics
22 |     >>> from river import optim
23 |     >>> from river import preprocessing
24 | 
25 |     >>> dataset = datasets.Phishing()
26 |     >>> optimizer = optim.SGD(0.1)
27 |     >>> model = (
28 |     ...     preprocessing.StandardScaler() |
29 |     ...     linear_model.LogisticRegression(optimizer)
30 |     ... )
31 |     >>> metric = metrics.F1()
32 | 
33 |     >>> evaluate.progressive_val_score(dataset, model, metric)
34 |     F1: 87.85%
35 | 
36 |     References
37 |     ----------
38 |     [^1]: [Robbins, H. and Monro, S., 1951. A stochastic approximation method. The annals of mathematical statistics, pp.400-407](https://pdfs.semanticscholar.org/34dd/d8865569c2c32dec9bf7ffc817ff42faaa01.pdf)
39 | 
40 |     """
41 | 
42 |     def __init__(self, lr=0.01) -> None:
43 |         super().__init__(lr)
44 | 
45 |     def _step_with_dict(self, w, g):
46 |         for i, gi in g.items():
47 |             w[i] -= self.learning_rate * gi
48 |         return w
49 | 
50 |     def _step_with_vector(self, w, g):
51 |         w -= self.learning_rate * g
52 |         return w
53 | 


--------------------------------------------------------------------------------
/river/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | """Feature preprocessing.
 2 | 
 3 | The purpose of this module is to modify an existing set of features so that they can be processed
 4 | by a machine learning algorithm. This may be done by scaling numeric parts of the data or by
 5 | one-hot encoding categorical features. The difference with the `feature_extraction` module is that
 6 | the latter extracts new information from the data
 7 | 
 8 | """
 9 | 
10 | from __future__ import annotations
11 | 
12 | from .feature_hasher import FeatureHasher
13 | from .impute import PreviousImputer, StatImputer
14 | from .lda import LDA
15 | from .one_hot import OneHotEncoder
16 | from .ordinal import OrdinalEncoder
17 | from .pred_clipper import PredClipper
18 | from .random_projection import GaussianRandomProjector, SparseRandomProjector
19 | from .scale import (
20 |     AdaptiveStandardScaler,
21 |     Binarizer,
22 |     MaxAbsScaler,
23 |     MinMaxScaler,
24 |     Normalizer,
25 |     RobustScaler,
26 |     StandardScaler,
27 | )
28 | from .scale_target import TargetMinMaxScaler, TargetStandardScaler
29 | 
30 | __all__ = [
31 |     "AdaptiveStandardScaler",
32 |     "Binarizer",
33 |     "FeatureHasher",
34 |     "GaussianRandomProjector",
35 |     "LDA",
36 |     "MaxAbsScaler",
37 |     "MinMaxScaler",
38 |     "Normalizer",
39 |     "OneHotEncoder",
40 |     "OrdinalEncoder",
41 |     "PredClipper",
42 |     "PreviousImputer",
43 |     "RobustScaler",
44 |     "SparseRandomProjector",
45 |     "StandardScaler",
46 |     "StatImputer",
47 |     "TargetMinMaxScaler",
48 |     "TargetStandardScaler",
49 | ]
50 | 


--------------------------------------------------------------------------------
/river/preprocessing/test_random_projection.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import numpy as np
 4 | 
 5 | from river import datasets, preprocessing
 6 | 
 7 | 
 8 | def test_gaussian_random_projector_dot_product():
 9 |     dataset = datasets.TrumpApproval()
10 |     projector = preprocessing.GaussianRandomProjector(n_components=3)
11 | 
12 |     for x, y in dataset:
13 |         y = projector.transform_one(x)
14 |         y_arr = np.array(list(y.values()))
15 |         x_arr = np.array(list(x.values()))
16 |         P = np.array(
17 |             [[projector._projection_matrix[i, j] for j in x] for i in range(projector.n_components)]
18 |         )
19 |         np.testing.assert_allclose(x_arr @ P.T, y_arr)
20 | 
21 | 
22 | def test_sparse_random_projector_dot_product():
23 |     dataset = datasets.TrumpApproval()
24 |     projector = preprocessing.SparseRandomProjector(n_components=3, density=0.5)
25 | 
26 |     for x, y in dataset:
27 |         y = projector.transform_one(x)
28 |         y_arr = np.array(list(y.values()))
29 |         x_arr = np.array(list(x.values()))
30 |         P = np.array(
31 |             [
32 |                 [projector._projection_matrix[j].get(i, 0) for j in x]
33 |                 for i in range(projector.n_components)
34 |             ]
35 |         )
36 |         np.testing.assert_allclose(x_arr @ P.T, y_arr)
37 | 
38 | 
39 | def test_sparse_random_projector_size():
40 |     dataset = datasets.TrumpApproval()
41 |     projector = preprocessing.SparseRandomProjector(n_components=3, density=0.5)
42 | 
43 |     for x, y in dataset:
44 |         projector.transform_one(x)
45 |         break
46 | 
47 |     n_weights = sum(len(v) for v in projector._projection_matrix.values())
48 |     assert n_weights < len(x) * projector.n_components
49 | 


--------------------------------------------------------------------------------
/river/proba/__init__.py:
--------------------------------------------------------------------------------
 1 | """Probability distributions."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from . import base
 6 | from .beta import Beta
 7 | from .gaussian import Gaussian, MultivariateGaussian
 8 | from .multinomial import Multinomial
 9 | 
10 | __all__ = ["base", "Beta", "Gaussian", "Multinomial", "MultivariateGaussian"]
11 | 


--------------------------------------------------------------------------------
/river/proba/test_gaussian.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | import pytest
 8 | 
 9 | from river import proba
10 | 
11 | 
12 | @pytest.mark.parametrize(
13 |     "p",
14 |     [
15 |         pytest.param(
16 |             p,
17 |             id=f"{p=}",
18 |         )
19 |         for p in [1, 3, 5]
20 |     ],
21 | )
22 | def test_univariate_multivariate_consistency(p):
23 |     X = pd.DataFrame(np.random.random((30, p)), columns=range(p))
24 | 
25 |     multi = proba.MultivariateGaussian()
26 |     single = {c: proba.Gaussian() for c in X.columns}
27 | 
28 |     for x in X.to_dict(orient="records"):
29 |         multi.update(x)
30 |         for c, s in single.items():
31 |             s.update(x[c])
32 | 
33 |     for c in X.columns:
34 |         assert math.isclose(multi.mu[c], single[c].mu)
35 |         assert math.isclose(multi.sigma[c][c], single[c].sigma)
36 | 


--------------------------------------------------------------------------------
/river/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/river/py.typed


--------------------------------------------------------------------------------
/river/reco/__init__.py:
--------------------------------------------------------------------------------
 1 | """Recommender systems module.
 2 | 
 3 | Recommender systems (recsys for short) is a large topic. This module is far from comprehensive. It
 4 | simply provides models which can contribute towards building a recommender system.
 5 | 
 6 | A typical recommender system is made up of a retrieval phase, followed by a ranking phase. The
 7 | output of the retrieval phase is a shortlist of the catalogue of items. The items in the shortlist
 8 | are then usually ranked according to the expected preference the user will have for each item. This
 9 | module focuses on the ranking phase.
10 | 
11 | Models which inherit from the `Ranker` class have a `rank` method. This allows sorting a set of
12 | items for a given user. Each model also has a `learn_one(user, item, y, context)` which allows
13 | learning user preferences. The `y` parameter is a reward value, the nature of which depends is
14 | specific to each and every recommendation task. Typically the reward is a number or a boolean
15 | value. It is up to the user to determine how to translate a user session into training data.
16 | 
17 | """
18 | 
19 | from __future__ import annotations
20 | 
21 | from . import base
22 | from .baseline import Baseline
23 | from .biased_mf import BiasedMF
24 | from .funk_mf import FunkMF
25 | from .normal import RandomNormal
26 | 
27 | __all__ = ["base", "Baseline", "BiasedMF", "FunkMF", "RandomNormal"]
28 | 


--------------------------------------------------------------------------------
/river/rules/__init__.py:
--------------------------------------------------------------------------------
1 | """Decision rules-based algorithms."""
2 | 
3 | from __future__ import annotations
4 | 
5 | from .amrules import AMRules
6 | 
7 | __all__ = ["AMRules"]
8 | 


--------------------------------------------------------------------------------
/river/sketch/__init__.py:
--------------------------------------------------------------------------------
 1 | """Data containers and collections for sequential data.
 2 | 
 3 | This module has summary and sketch structures that operate with constrained amounts
 4 | of memory and processing time.
 5 | 
 6 | """
 7 | 
 8 | from __future__ import annotations
 9 | 
10 | from .counter import Counter
11 | from .heavy_hitters import HeavyHitters
12 | from .histogram import Histogram
13 | from .set import Set
14 | 
15 | __all__ = ["Counter", "HeavyHitters", "Histogram", "Set"]
16 | 


--------------------------------------------------------------------------------
/river/stats/__init__.py:
--------------------------------------------------------------------------------
 1 | """Running statistics"""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from . import base
 6 | from .auto_corr import AutoCorr
 7 | from .count import Count
 8 | from .cov import Cov
 9 | from .entropy import Entropy
10 | from .ewmean import EWMean
11 | from .ewvar import EWVar
12 | from .iqr import IQR, RollingIQR
13 | from .kolmogorov_smirnov import KolmogorovSmirnov
14 | from .kurtosis import Kurtosis
15 | from .link import Link
16 | from .mad import MAD
17 | from .maximum import AbsMax, Max, RollingAbsMax, RollingMax
18 | from .mean import BayesianMean, Mean
19 | from .minimum import Min, RollingMin
20 | from .mode import Mode, RollingMode
21 | from .n_unique import NUnique
22 | from .pearson import PearsonCorr
23 | from .ptp import PeakToPeak, RollingPeakToPeak
24 | from .quantile import Quantile, RollingQuantile
25 | from .sem import SEM
26 | from .shift import Shift
27 | from .skew import Skew
28 | from .summing import Sum
29 | from .var import Var
30 | 
31 | __all__ = [
32 |     "base",
33 |     "AbsMax",
34 |     "AutoCorr",
35 |     "BayesianMean",
36 |     "Count",
37 |     "Cov",
38 |     "Entropy",
39 |     "EWMean",
40 |     "EWVar",
41 |     "IQR",
42 |     "KolmogorovSmirnov",
43 |     "Kurtosis",
44 |     "Link",
45 |     "MAD",
46 |     "Max",
47 |     "Mean",
48 |     "Min",
49 |     "Mode",
50 |     "NUnique",
51 |     "PeakToPeak",
52 |     "PearsonCorr",
53 |     "Quantile",
54 |     "RollingAbsMax",
55 |     "RollingIQR",
56 |     "RollingMax",
57 |     "RollingMin",
58 |     "RollingMode",
59 |     "RollingPeakToPeak",
60 |     "RollingQuantile",
61 |     "SEM",
62 |     "Shift",
63 |     "Skew",
64 |     "Sum",
65 |     "Var",
66 | ]
67 | 


--------------------------------------------------------------------------------
/river/stats/_rust_stats.pyi:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | class RsQuantile:
 4 |     def __init__(self, q: float): ...
 5 |     def update(self, x: float): ...
 6 |     def get(self) -> float: ...
 7 | 
 8 | class RsEWMean:
 9 |     def __init__(self, alpha: float): ...
10 |     def update(self, x: float): ...
11 |     def get(self) -> float: ...
12 | 
13 | class RsEWVar:
14 |     def __init__(self, alpha: float): ...
15 |     def update(self, x: float): ...
16 |     def get(self) -> float: ...
17 | 
18 | class RsIQR:
19 |     def __init__(self, q_inf: float, q_sup: float): ...
20 |     def update(self, x: float): ...
21 |     def get(self) -> float: ...
22 | 
23 | class RsKurtosis:
24 |     def __init__(self, bias: bool): ...
25 |     def update(self, x: float): ...
26 |     def get(self) -> float: ...
27 | 
28 | class RsPeakToPeak:
29 |     def __init__(self): ...
30 |     def update(self, x: float): ...
31 |     def get(self) -> float: ...
32 | 
33 | class RsSkew:
34 |     def __init__(self, bias: float): ...
35 |     def update(self, x: float): ...
36 |     def get(self) -> float: ...
37 | 
38 | class RsRollingQuantile:
39 |     def __init__(self, q: float, window_size: int): ...
40 |     def update(self, x: float): ...
41 |     def get(self) -> float: ...
42 | 
43 | class RsRollingIQR:
44 |     def __init__(self, q_inf: float, q_sup: float, window_size: int): ...
45 |     def update(self, x: float): ...
46 |     def get(self) -> float: ...
47 | 


--------------------------------------------------------------------------------
/river/stats/count.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stats
 4 | 
 5 | 
 6 | class Count(stats.base.Univariate):
 7 |     """A simple counter.
 8 | 
 9 |     Attributes
10 |     ----------
11 |     n : int
12 |         The current number of observations.
13 | 
14 |     """
15 | 
16 |     def __init__(self):
17 |         self.n = 0
18 | 
19 |     def update(self, x=None):
20 |         self.n += 1
21 | 
22 |     def get(self):
23 |         return self.n
24 | 


--------------------------------------------------------------------------------
/river/stats/mad.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from . import quantile
 4 | 
 5 | 
 6 | class MAD(quantile.Quantile):
 7 |     """Median Absolute Deviation (MAD).
 8 | 
 9 |     The median absolute deviation is the median of the absolute differences between each data point
10 |     and the data's overall median. In an online setting, the median of the data is unknown
11 |     beforehand. Therefore, both the median of the data and the median of the differences of the
12 |     data with respect to the latter are updated online. To be precise, the median of the data is
13 |     updated before the median of the differences. As a consequence, this online version of the MAD
14 |     does not coincide exactly with its batch counterpart.
15 | 
16 |     Examples
17 |     --------
18 | 
19 |     >>> from river import stats
20 | 
21 |     >>> X = [4, 2, 5, 3, 0, 4]
22 | 
23 |     >>> mad = stats.MAD()
24 |     >>> for x in X:
25 |     ...     mad.update(x)
26 |     ...     print(mad.get())
27 |     0.0
28 |     2.0
29 |     1.0
30 |     1.0
31 |     1.0
32 |     1.0
33 | 
34 |     Attributes
35 |     ----------
36 |     median : stats.Median
37 |         The median of the data.
38 | 
39 |     References
40 |     ----------
41 |     [^1]: [Median absolute deviation article on Wikipedia](https://www.wikiwand.com/en/Median_absolute_deviation)
42 | 
43 |     """
44 | 
45 |     #
46 |     def __init__(self):
47 |         super().__init__(q=0.5)
48 |         self.median = quantile.Quantile(q=0.5)
49 | 
50 |     def update(self, x):
51 |         self.median.update(x)
52 |         super().update(abs(x - self.median.get()))
53 | 


--------------------------------------------------------------------------------
/river/stats/minimum.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | 
 5 | from river import stats, utils
 6 | 
 7 | 
 8 | class Min(stats.base.Univariate):
 9 |     """Running min.
10 | 
11 |     Attributes
12 |     ----------
13 |     min : float
14 |         The current min.
15 | 
16 |     """
17 | 
18 |     def __init__(self):
19 |         self.min = math.inf
20 | 
21 |     def update(self, x):
22 |         if x < self.min:
23 |             self.min = x
24 | 
25 |     def get(self):
26 |         return self.min
27 | 
28 | 
29 | class RollingMin(stats.base.RollingUnivariate):
30 |     """Running min over a window.
31 | 
32 |     Parameters
33 |     ----------
34 |     window_size
35 |         Size of the rolling window.
36 | 
37 |     Examples
38 |     --------
39 | 
40 |     >>> from river import stats
41 | 
42 |     >>> X = [1, -4, 3, -2, 2, 1]
43 |     >>> rolling_min = stats.RollingMin(2)
44 |     >>> for x in X:
45 |     ...     rolling_min.update(x)
46 |     ...     print(rolling_min.get())
47 |     1
48 |     -4
49 |     -4
50 |     -2
51 |     -2
52 |     1
53 | 
54 |     """
55 | 
56 |     def __init__(self, window_size: int):
57 |         self.window = utils.SortedWindow(size=window_size)
58 | 
59 |     @property
60 |     def window_size(self):
61 |         return self.window.size
62 | 
63 |     def update(self, x):
64 |         self.window.append(x)
65 | 
66 |     def get(self):
67 |         try:
68 |             return self.window[0]
69 |         except IndexError:
70 |             return None
71 | 


--------------------------------------------------------------------------------
/river/stats/sem.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from . import var
 4 | 
 5 | 
 6 | class SEM(var.Var):
 7 |     """Running standard error of the mean using Welford's algorithm.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     ddof
12 |         Delta Degrees of Freedom. The divisor used in calculations is `n - ddof`, where `n` is the
13 |         number of seen elements.
14 | 
15 |     Attributes
16 |     ----------
17 |     n : int
18 |         Number of observations.
19 | 
20 |     Examples
21 |     --------
22 | 
23 |     >>> from river import stats
24 | 
25 |     >>> X = [3, 5, 4, 7, 10, 12]
26 | 
27 |     >>> sem = stats.SEM()
28 |     >>> for x in X:
29 |     ...     sem.update(x)
30 |     ...     print(sem.get())
31 |     0.0
32 |     1.0
33 |     0.577350
34 |     0.853912
35 |     1.240967
36 |     1.447219
37 | 
38 |     >>> from river import utils
39 | 
40 |     >>> X = [1, 4, 2, -4, -8, 0]
41 | 
42 |     >>> rolling_sem = utils.Rolling(stats.SEM(ddof=1), window_size=3)
43 |     >>> for x in X:
44 |     ...     rolling_sem.update(x)
45 |     ...     print(rolling_sem.get())
46 |     0.0
47 |     1.5
48 |     0.881917
49 |     2.403700
50 |     2.905932
51 |     2.309401
52 | 
53 |     References
54 |     ----------
55 |     [^1]: [Wikipedia article on algorithms for calculating variance](https://www.wikiwand.com/en/Algorithms_for_calculating_variance#/Covariance)
56 | 
57 |     """
58 | 
59 |     def get(self):
60 |         try:
61 |             return (super().get() / self.mean.n) ** 0.5
62 |         except ZeroDivisionError:
63 |             return None
64 | 


--------------------------------------------------------------------------------
/river/stats/summing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import stats
 4 | 
 5 | 
 6 | class Sum(stats.base.Univariate):
 7 |     """Running sum.
 8 | 
 9 |     Attributes
10 |     ----------
11 |     sum : float
12 |         The running sum.
13 | 
14 |     Examples
15 |     --------
16 | 
17 |     >>> from river import stats
18 | 
19 |     >>> X = [-5, -3, -1, 1, 3, 5]
20 |     >>> mean = stats.Sum()
21 |     >>> for x in X:
22 |     ...     mean.update(x)
23 |     ...     print(mean.get())
24 |     -5.0
25 |     -8.0
26 |     -9.0
27 |     -8.0
28 |     -5.0
29 |     0.0
30 | 
31 |     >>> from river import utils
32 | 
33 |     >>> X = [1, -4, 3, -2, 2, 1]
34 |     >>> rolling_sum = utils.Rolling(stats.Sum(), window_size=2)
35 |     >>> for x in X:
36 |     ...     rolling_sum.update(x)
37 |     ...     print(rolling_sum.get())
38 |     1.0
39 |     -3.0
40 |     -1.0
41 |     1.0
42 |     0.0
43 |     3.0
44 | 
45 |     """
46 | 
47 |     def __init__(self):
48 |         self.sum = 0.0
49 | 
50 |     def update(self, x):
51 |         self.sum += x
52 | 
53 |     def revert(self, x):
54 |         self.sum -= x
55 | 
56 |     def get(self):
57 |         return self.sum
58 | 


--------------------------------------------------------------------------------
/river/stats/test_kolmogorov_smirnov.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from collections import deque
 4 | 
 5 | import numpy as np
 6 | from scipy.stats import ks_2samp
 7 | 
 8 | from river import stats
 9 | 
10 | 
11 | def test_incremental_ks_statistics():
12 |     initial_a = np.random.normal(loc=0, scale=1, size=500)
13 |     initial_b = np.random.normal(loc=1, scale=1, size=500)
14 | 
15 |     stream_a = np.random.normal(loc=0, scale=1, size=5000)
16 |     stream_b = np.random.normal(loc=1, scale=1, size=5000)
17 | 
18 |     incremental_ks_statistics = []
19 |     incremental_ks = stats.KolmogorovSmirnov(statistic="ks")
20 |     sliding_a = deque(initial_a)
21 |     sliding_b = deque(initial_b)
22 | 
23 |     for a, b in zip(initial_a, initial_b):
24 |         incremental_ks.update(a, b)
25 |     for a, b in zip(stream_a, stream_b):
26 |         incremental_ks.revert(sliding_a.popleft(), sliding_b.popleft())
27 |         sliding_a.append(a)
28 |         sliding_b.append(b)
29 |         incremental_ks.update(a, b)
30 |         incremental_ks_statistics.append(incremental_ks.get())
31 | 
32 |     ks_2samp_statistics = []
33 |     sliding_a = deque(initial_a)
34 |     sliding_b = deque(initial_b)
35 | 
36 |     for a, b in zip(stream_a, stream_b):
37 |         sliding_a.popleft()
38 |         sliding_b.popleft()
39 |         sliding_a.append(a)
40 |         sliding_b.append(b)
41 |         ks_2samp_statistics.append(ks_2samp(sliding_a, sliding_b).statistic)
42 | 
43 |     assert np.allclose(np.array(incremental_ks_statistics), np.array(ks_2samp_statistics))
44 | 
45 |     assert incremental_ks._test_ks_threshold(ca=incremental_ks._ca(p_value=0.05)) is True
46 | 


--------------------------------------------------------------------------------
/river/stats/test_quantile.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import random
 4 | 
 5 | from river import stats
 6 | 
 7 | 
 8 | def test_issue_1178():
 9 |     """
10 | 
11 |     https://github.com/online-ml/river/issues/1178
12 | 
13 |     >>> from river import stats
14 | 
15 |     >>> q = stats.Quantile(0.01)
16 |     >>> for x in [5, 0, 0, 0, 0, 0, 0, 0]:
17 |     ...     q.update(x)
18 |     ...     print(q)
19 |     Quantile: 5.
20 |     Quantile: 0.
21 |     Quantile: 0.
22 |     Quantile: 0.
23 |     Quantile: 0.
24 |     Quantile: 0.
25 |     Quantile: 0.
26 |     Quantile: 0.
27 | 
28 |     >>> q = stats.Quantile(0.99)
29 |     >>> for x in [5, 0, 0, 0, 0, 0, 0, 0]:
30 |     ...     q.update(x)
31 |     ...     print(q)
32 |     Quantile: 5.
33 |     Quantile: 5.
34 |     Quantile: 5.
35 |     Quantile: 5.
36 |     Quantile: 5.
37 |     Quantile: 0.
38 |     Quantile: 0.277778
39 |     Quantile: 0.827546
40 | 
41 |     """
42 | 
43 | 
44 | def test_ge():
45 |     low = stats.Quantile(0.01)
46 |     high = stats.Quantile(0.99)
47 | 
48 |     for _ in range(100):
49 |         x = random.random()
50 |         low.update(x)
51 |         high.update(x)
52 |         assert high.get() >= low.get()
53 | 


--------------------------------------------------------------------------------
/river/stats/test_var.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | import random
 5 | 
 6 | from river import stats
 7 | 
 8 | 
 9 | def test_weighted_variance_with_close_numbers():
10 |     """
11 | 
12 |     Origin of this test: https://github.com/online-ml/river/issues/732
13 | 
14 |     This test would fail if Var were implemented with a numerically unstable algorithm.
15 | 
16 |     """
17 | 
18 |     D = [
19 |         (99.99999978143265, 6),
20 |         (99.99999989071631, 8),
21 |         (99.99999994535816, 6),
22 |         (99.99999997267908, 9),
23 |         (99.99999998633952, 10),
24 |         (99.99999999316977, 3),
25 |         (99.99999999829245, 5),
26 |         (99.99999999957309, 9),
27 |     ]
28 | 
29 |     var = stats.Var()
30 | 
31 |     for x, w in D:
32 |         var.update(x, w)
33 | 
34 |     assert var.get() > 0 and math.isclose(var.get(), 4.648047194845607e-15)
35 | 
36 | 
37 | def test_revert():
38 |     for _ in range(5):
39 |         X = [random.random() for _ in range(20)]
40 | 
41 |         v1 = stats.Var()
42 |         v2 = stats.Var()
43 | 
44 |         for x in X[:10]:
45 |             v1.update(x)
46 |             v2.update(x)
47 | 
48 |         for x in X[10:]:
49 |             v2.update(x)
50 |         for x in X[10:]:
51 |             v2.revert(x)
52 | 
53 |         assert math.isclose(v1.get(), v2.get())
54 | 


--------------------------------------------------------------------------------
/river/stream/__init__.py:
--------------------------------------------------------------------------------
 1 | """Streaming utilities.
 2 | 
 3 | The module includes tools to iterate over data streams.
 4 | 
 5 | """
 6 | 
 7 | from __future__ import annotations
 8 | 
 9 | from .cache import Cache
10 | from .iter_arff import iter_arff
11 | from .iter_array import iter_array
12 | from .iter_csv import iter_csv
13 | from .iter_libsvm import iter_libsvm
14 | from .qa import simulate_qa
15 | from .shuffling import shuffle
16 | from .tweet_stream import TwitterLiveStream
17 | from .twitch_chat_stream import TwitchChatStream
18 | 
19 | __all__ = [
20 |     "Cache",
21 |     "iter_arff",
22 |     "iter_array",
23 |     "iter_csv",
24 |     "iter_libsvm",
25 |     "simulate_qa",
26 |     "shuffle",
27 |     "TwitterLiveStream",
28 |     "TwitchChatStream",
29 | ]
30 | 
31 | try:
32 |     from .iter_polars import iter_polars
33 | 
34 |     __all__ += ["iter_polars"]
35 | except ImportError:
36 |     pass
37 | 
38 | try:
39 |     from .iter_pandas import iter_pandas
40 | 
41 |     __all__ += ["iter_pandas"]
42 | except ImportError:
43 |     pass
44 | 
45 | try:
46 |     from .iter_sklearn import iter_sklearn_dataset
47 | 
48 |     __all__ += ["iter_sklearn_dataset"]
49 | except ImportError:
50 |     pass
51 | 
52 | try:
53 |     from .iter_sql import iter_sql
54 | 
55 |     __all__ += ["iter_sql"]
56 | except ImportError:
57 |     pass
58 | 
59 | try:
60 |     from .iter_vaex import iter_vaex
61 | 
62 |     __all__ += ["iter_vaex"]
63 | except ImportError:
64 |     pass
65 | 


--------------------------------------------------------------------------------
/river/stream/iter_pandas.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from river import base, stream
 6 | 
 7 | 
 8 | def iter_pandas(
 9 |     X: pd.DataFrame, y: pd.Series | pd.DataFrame | None = None, **kwargs
10 | ) -> base.typing.Stream:
11 |     """Iterates over the rows of a `pandas.DataFrame`.
12 | 
13 |     Parameters
14 |     ----------
15 |     X
16 |         A dataframe of features.
17 |     y
18 |         A series or a dataframe with one column per target.
19 |     kwargs
20 |         Extra keyword arguments are passed to the underlying call to `stream.iter_array`.
21 | 
22 |     Examples
23 |     --------
24 | 
25 |     >>> import pandas as pd
26 |     >>> from river import stream
27 | 
28 |     >>> X = pd.DataFrame({
29 |     ...     'x1': [1, 2, 3, 4],
30 |     ...     'x2': ['blue', 'yellow', 'yellow', 'blue'],
31 |     ...     'y': [True, False, False, True]
32 |     ... })
33 |     >>> y = X.pop('y')
34 | 
35 |     >>> for xi, yi in stream.iter_pandas(X, y):
36 |     ...     print(xi, yi)
37 |     {'x1': 1, 'x2': 'blue'} True
38 |     {'x1': 2, 'x2': 'yellow'} False
39 |     {'x1': 3, 'x2': 'yellow'} False
40 |     {'x1': 4, 'x2': 'blue'} True
41 | 
42 |     """
43 | 
44 |     kwargs["feature_names"] = X.columns
45 |     if isinstance(y, pd.DataFrame):
46 |         kwargs["target_names"] = y.columns
47 | 
48 |     yield from stream.iter_array(X=X.to_numpy(), y=y if y is None else y.to_numpy(), **kwargs)
49 | 


--------------------------------------------------------------------------------
/river/stream/iter_polars.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import polars as pl
 4 | 
 5 | from river import base, stream
 6 | 
 7 | 
 8 | def iter_polars(
 9 |     X: pl.DataFrame, y: pl.Series | pl.DataFrame | None = None, **kwargs
10 | ) -> base.typing.Stream:
11 |     """Iterates over the rows of a `polars.DataFrame`.
12 | 
13 |     Parameters
14 |     ----------
15 |     X
16 |         A dataframe of features.
17 |     y
18 |         A series or a dataframe with one column per target.
19 |     kwargs
20 |         Extra keyword arguments are passed to the underlying call to `stream.iter_array`.
21 | 
22 |     Examples
23 |     --------
24 | 
25 |     >>> import polars as pl
26 |     >>> from river import stream
27 | 
28 |     >>> X = pl.DataFrame({
29 |     ...     'x1': [1, 2, 3, 4],
30 |     ...     'x2': ['blue', 'yellow', 'yellow', 'blue'],
31 |     ...     'y': [True, False, False, True]
32 |     ... })
33 |     >>> y = X.get_column('y')
34 |     >>> X=X.drop("y")
35 | 
36 |     >>> for xi, yi in stream.iter_polars(X, y):
37 |     ...     print(xi, yi)
38 |     {'x1': 1, 'x2': 'blue'} True
39 |     {'x1': 2, 'x2': 'yellow'} False
40 |     {'x1': 3, 'x2': 'yellow'} False
41 |     {'x1': 4, 'x2': 'blue'} True
42 | 
43 |     """
44 | 
45 |     kwargs["feature_names"] = X.columns
46 |     if isinstance(y, pl.DataFrame):
47 |         kwargs["target_names"] = y.columns
48 | 
49 |     yield from stream.iter_array(X=X.to_numpy(), y=y if y is None else y.to_numpy(), **kwargs)
50 | 


--------------------------------------------------------------------------------
/river/stream/iter_vaex.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import vaex
 4 | from vaex.utils import _ensure_list, _ensure_strings_from_expressions
 5 | 
 6 | from river import base
 7 | 
 8 | 
 9 | def iter_vaex(
10 |     X: vaex.dataframe.DataFrame,
11 |     y: str | vaex.expression.Expression | None = None,
12 |     features: list[str] | vaex.expression.Expression | None = None,
13 | ) -> base.typing.Stream:
14 |     """Yields rows from a ``vaex.DataFrame``.
15 | 
16 |     Parameters
17 |     ----------
18 |     X
19 |         A vaex DataFrame housing the training features.
20 |     y
21 |         The column or expression containing the target variable.
22 |     features
23 |         A list of features used for training. If None, all columns in `X` will be used. Features
24 |         specifying in `y` are ignored.
25 | 
26 |     """
27 | 
28 |     features = _ensure_strings_from_expressions(features)
29 |     feature_names = features or X.get_column_names()
30 | 
31 |     if y:
32 |         y = _ensure_strings_from_expressions(y)
33 |         y = _ensure_list(y)
34 |         feature_names = [feat for feat in feature_names if feat not in y]
35 | 
36 |     multioutput = len(y) > 1
37 | 
38 |     if multioutput:
39 |         for i in range(len(X)):
40 |             yield (
41 |                 {key: X.evaluate(key, i, i + 1)[0] for key in feature_names},
42 |                 {key: X.evaluate(key, i, i + 1)[0] for key in y},
43 |             )
44 | 
45 |     else:
46 |         for i in range(len(X)):
47 |             yield (
48 |                 {key: X.evaluate(key, i, i + 1)[0] for key in feature_names},
49 |                 X.evaluate(y[0], i, i + 1)[0],
50 |             )
51 | 


--------------------------------------------------------------------------------
/river/stream/pokedb.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/online-ml/river/9e2ceca900ba53f0ee710a6e69f972b05f74d43a/river/stream/pokedb.zip


--------------------------------------------------------------------------------
/river/stream/test_iter_csv.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import io
 4 | 
 5 | from river import stream
 6 | 
 7 | 
 8 | def test_iter_csv_custom_converter():
 9 |     example = io.StringIO("col1,col2,col3\n,1,2\n5,,4\n3,1,")
10 | 
11 |     def int_or_none(s):
12 |         try:
13 |             return int(s)
14 |         except ValueError:
15 |             return None
16 | 
17 |     params = {"converters": {"col1": int_or_none, "col2": int_or_none, "col3": int_or_none}}
18 |     dataset = stream.iter_csv(example, **params)
19 |     assert list(dataset) == [
20 |         ({"col1": None, "col2": 1, "col3": 2}, None),
21 |         ({"col1": 5, "col2": None, "col3": 4}, None),
22 |         ({"col1": 3, "col2": 1, "col3": None}, None),
23 |     ]
24 | 
25 | 
26 | def test_iter_csv_drop_nones():
27 |     example = io.StringIO("col1,col2,col3\n,1,2\n5,,4\n3,1,")
28 | 
29 |     def int_or_none(s):
30 |         try:
31 |             return int(s)
32 |         except ValueError:
33 |             return None
34 | 
35 |     params = {
36 |         "converters": {"col1": int_or_none, "col2": int_or_none, "col3": int_or_none},
37 |         "drop_nones": True,
38 |     }
39 |     dataset = stream.iter_csv(example, **params)
40 |     assert list(dataset) == [
41 |         ({"col2": 1, "col3": 2}, None),
42 |         ({"col1": 5, "col3": 4}, None),
43 |         ({"col1": 3, "col2": 1}, None),
44 |     ]
45 | 


--------------------------------------------------------------------------------
/river/stream/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import functools
 4 | import gzip
 5 | import io
 6 | import os
 7 | import zipfile
 8 | 
 9 | 
10 | def open_filepath(filepath_or_buffer, compression):
11 |     # Determine the compression from the file extension if "infer" has been specified
12 |     if compression == "infer":
13 |         _, ext = os.path.splitext(filepath_or_buffer)
14 |         compression = {".gz": "gzip", ".zip": "zip"}.get(ext)
15 | 
16 |     def open_zipfile(path):
17 |         with zipfile.ZipFile(path, "r") as zf:
18 |             f = zf.open(zf.namelist()[0], "r")
19 |             f = io.TextIOWrapper(f)
20 |             return f
21 | 
22 |     # Determine the file opening method from the compression
23 |     open_func = {
24 |         None: open,
25 |         "gzip": functools.partial(gzip.open, mode="rt"),
26 |         "zip": open_zipfile,
27 |     }[compression]
28 | 
29 |     # Open the file using the opening method
30 |     return open_func(filepath_or_buffer)
31 | 


--------------------------------------------------------------------------------
/river/time_series/__init__.py:
--------------------------------------------------------------------------------
 1 | """Time series forecasting."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from . import base
 6 | from .evaluate import evaluate, iter_evaluate
 7 | from .holt_winters import HoltWinters
 8 | from .metrics import ForecastingMetric, HorizonAggMetric, HorizonMetric
 9 | from .snarimax import SNARIMAX
10 | 
11 | __all__ = [
12 |     "base",
13 |     "evaluate",
14 |     "iter_evaluate",
15 |     "ForecastingMetric",
16 |     "HorizonAggMetric",
17 |     "HorizonMetric",
18 |     "HoltWinters",
19 |     "SNARIMAX",
20 | ]
21 | 


--------------------------------------------------------------------------------
/river/time_series/base.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import abc
 4 | 
 5 | from river import base
 6 | 
 7 | __all__ = ["Forecaster"]
 8 | 
 9 | 
10 | class Forecaster(base.Estimator):
11 |     @property
12 |     def _supervised(self):
13 |         return True
14 | 
15 |     @abc.abstractmethod
16 |     def learn_one(self, y: float, x: dict | None = None) -> None:
17 |         """Updates the model.
18 | 
19 |         Parameters
20 |         ----------
21 |         y
22 |             In the literature this is called the endogenous variable.
23 |         x
24 |             Optional additional features to learn from. In the literature these are called the
25 |             exogenous variables.
26 | 
27 |         """
28 |         raise NotImplementedError
29 | 
30 |     @abc.abstractmethod
31 |     def forecast(self, horizon: int, xs: list[dict] | None = None) -> list:
32 |         """Makes forecast at each step of the given horizon.
33 | 
34 |         Parameters
35 |         ----------
36 |         horizon
37 |             The number of steps ahead to forecast.
38 |         xs
39 |             The set of optional additional features. If given, then it's length should be equal to
40 |             the horizon.
41 | 
42 |         """
43 | 


--------------------------------------------------------------------------------
/river/time_series/test_evaluate.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from river import datasets, metrics, stats, time_series
 4 | 
 5 | 
 6 | class MeanForecaster(time_series.base.Forecaster):
 7 |     def __init__(self):
 8 |         self.mean = stats.Mean()
 9 | 
10 |     def learn_one(self, y, x=None):
11 |         self.mean.update(y)
12 | 
13 |     def forecast(self, horizon, xs=None):
14 |         return [self.mean.get()] * horizon
15 | 
16 | 
17 | def test_forecasts_at_each_step():
18 |     dataset = datasets.AirlinePassengers()
19 |     model = MeanForecaster()
20 |     metric = metrics.MAE()
21 |     horizon = 12
22 |     grace_period = 1
23 | 
24 |     steps = time_series.iter_evaluate(
25 |         dataset=dataset, model=model, metric=metric, horizon=horizon, grace_period=grace_period
26 |     )
27 | 
28 |     _, _, y_pred, _ = next(steps)
29 |     assert y_pred == [112] * horizon
30 |     _, _, y_pred, _ = next(steps)
31 |     assert y_pred == [(112 + 118) / 2] * horizon
32 |     _, _, y_pred, _ = next(steps)
33 |     assert y_pred == [(112 + 118 + 132) / 3] * horizon
34 |     _, _, y_pred, _ = next(steps)
35 |     assert y_pred == [(112 + 118 + 132 + 129) / 4] * horizon
36 | 
37 |     n_steps = sum(
38 |         1
39 |         for _ in time_series.iter_evaluate(
40 |             dataset=dataset, model=model, metric=metric, horizon=horizon, grace_period=grace_period
41 |         )
42 |     )
43 |     assert n_steps == dataset.n_samples - horizon - grace_period
44 | 


--------------------------------------------------------------------------------
/river/tree/losses.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import abc
 4 | import math
 5 | 
 6 | from .utils import GradHess
 7 | 
 8 | 
 9 | class Loss(abc.ABC):
10 |     """Base class to implement optimization objectives used in Stochastic Gradient Trees."""
11 | 
12 |     @abc.abstractmethod
13 |     def compute_derivatives(self, y_true: float, y_pred: float) -> GradHess:
14 |         """Return the gradient and hessian data concerning one instance and its prediction.
15 | 
16 |         Parameters
17 |         ----------
18 |         y_true
19 |             Target value.
20 |         y_pred
21 |             Predicted target value.
22 |         """
23 |         raise NotImplementedError
24 | 
25 |     def transfer(self, y: float) -> float:
26 |         """Optionally apply some transformation to the value predicted by the tree before
27 |         returning it.
28 | 
29 |         For instance, in classification, the softmax operation might be applied.
30 | 
31 |         Parameters
32 |         ----------
33 |         y
34 |             Value to be transformed.
35 |         """
36 |         return y
37 | 
38 | 
39 | class BinaryCrossEntropyLoss(Loss):
40 |     """Loss function used in binary classification tasks."""
41 | 
42 |     def compute_derivatives(self, y_true, y_pred):
43 |         y_trs = self.transfer(y_pred)
44 | 
45 |         return GradHess(y_trs - y_true, y_trs * (1.0 - y_trs))
46 | 
47 |     def transfer(self, y):
48 |         return 1.0 / (1.0 + math.exp(-y))
49 | 
50 | 
51 | class SquaredErrorLoss(Loss):
52 |     """Loss function used in regression tasks."""
53 | 
54 |     def compute_derivatives(self, y_true, y_pred):
55 |         return GradHess(y_pred - y_true, 1.0)
56 | 


--------------------------------------------------------------------------------
/river/tree/mondrian/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The `river.tree.mondrian` module includes learning and split node
 3 | implementations for the Mondrian trees.
 4 | 
 5 | Note that this module is not exposed in the tree module, and is instead used by the
 6 | AMFClassifier and AMFRegressor classes in the ensemble module.
 7 | 
 8 | """
 9 | 
10 | from __future__ import annotations
11 | 
12 | from .mondrian_tree import MondrianTree
13 | from .mondrian_tree_classifier import MondrianTreeClassifier
14 | from .mondrian_tree_regressor import MondrianTreeRegressor
15 | 
16 | __all__ = ["MondrianTree", "MondrianTreeClassifier", "MondrianTreeRegressor"]
17 | 


--------------------------------------------------------------------------------
/river/tree/mondrian/mondrian_tree.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import abc
 4 | import random
 5 | 
 6 | 
 7 | class MondrianTree(abc.ABC):
 8 |     """Base class for Mondrian Trees.
 9 | 
10 |     This is an **abstract class**, so it cannot be used directly. It defines base operations
11 |     and properties that all the Mondrian Trees must inherit or implement according to
12 |     their own design.
13 | 
14 |     Parameters
15 |     ----------
16 |     step
17 |         Step parameter of the tree.
18 |     loss
19 |         Loss to minimize for each node of the tree. At the moment it is a placeholder.
20 |         In the future, different optimization metrics might become available.
21 |     use_aggregation
22 |         Whether or not the tree should it use aggregation.
23 |     iteration
24 |         Number of iterations to run when training.
25 |     seed
26 |         Random seed for reproducibility.
27 | 
28 |     """
29 | 
30 |     def __init__(
31 |         self,
32 |         step: float = 0.1,
33 |         loss: str = "log",
34 |         use_aggregation: bool = True,
35 |         iteration: int = 0,
36 |         seed: int | None = None,
37 |     ):
38 |         # Properties common to all the Mondrian Trees
39 |         self.step = step
40 |         self.loss = loss
41 |         self.use_aggregation = use_aggregation
42 |         self.iteration = iteration
43 | 
44 |         # Controls the randomness in the tree
45 |         self.seed = seed
46 |         self._rng = random.Random(seed)
47 | 


--------------------------------------------------------------------------------
/river/tree/nodes/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The `river.tree.nodes` module includes learning and split node
3 | implementations for the hoeffding trees.
4 | """
5 | 


--------------------------------------------------------------------------------
/river/tree/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from numpy.distutils.misc_util import Configuration
 4 | 
 5 | 
 6 | def configuration(parent_package="", top_path=None):
 7 |     config = Configuration("tree", parent_package, top_path)
 8 | 
 9 |     # submodules which do not have their own setup.py
10 |     config.add_subpackage("splitter")
11 |     config.add_subpackage("nodes")
12 |     config.add_subpackage("split_criterion")
13 | 
14 |     return config
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     from numpy.distutils.core import setup
19 | 
20 |     setup(**configuration().todict())
21 | 


--------------------------------------------------------------------------------
/river/tree/split_criterion/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from .gini_split_criterion import GiniSplitCriterion
 4 | from .hellinger_distance_criterion import HellingerDistanceCriterion
 5 | from .info_gain_split_criterion import InfoGainSplitCriterion
 6 | from .intra_cluster_variance_reduction_split_criterion import (
 7 |     IntraClusterVarianceReductionSplitCriterion,
 8 | )
 9 | from .variance_ratio_split_criterion import VarianceRatioSplitCriterion
10 | from .variance_reduction_split_criterion import VarianceReductionSplitCriterion
11 | 
12 | __all__ = [
13 |     "GiniSplitCriterion",
14 |     "HellingerDistanceCriterion",
15 |     "InfoGainSplitCriterion",
16 |     "IntraClusterVarianceReductionSplitCriterion",
17 |     "VarianceRatioSplitCriterion",
18 |     "VarianceReductionSplitCriterion",
19 | ]
20 | 


--------------------------------------------------------------------------------
/river/tree/split_criterion/base.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import abc
 4 | 
 5 | 
 6 | class SplitCriterion(abc.ABC):
 7 |     """SplitCriterion
 8 | 
 9 |     Abstract class for computing splitting criteria with respect to distributions of class values.
10 |     The split criterion is used as a parameter on decision trees and decision stumps.
11 | 
12 |     This class should not me instantiated, as none of its methods are implemented.
13 | 
14 |     """
15 | 
16 |     def __init__(self):
17 |         super().__init__()
18 | 
19 |     @abc.abstractmethod
20 |     def merit_of_split(self, pre_split_dist, post_split_dist):
21 |         """Compute the merit of splitting for a given distribution before the split and after it.
22 | 
23 |         Parameters
24 |         ----------
25 |         pre_split_dist
26 |             The target statistics before the split.
27 |         post_split_dist
28 |             the target statistics after the split.
29 | 
30 |         Returns
31 |         -------
32 |         Value of the merit of splitting
33 |         """
34 | 
35 |     @abc.abstractmethod
36 |     def current_merit(self, dist):
37 |         """Compute the merit of the distribution.
38 | 
39 |         Parameters
40 |         ----------
41 |         dist
42 |             The data distribution.
43 | 
44 |         Returns
45 |         -------
46 |         Value of merit of the distribution according to the splitting criterion
47 |         """
48 | 
49 |     @staticmethod
50 |     @abc.abstractmethod
51 |     def range_of_merit(pre_split_dist):
52 |         """Compute the range of splitting merit.
53 | 
54 |         Parameters
55 |         ----------
56 |         pre_split_dist
57 |             The target statistics before the split.
58 | 
59 |         Returns
60 |         -------
61 |         Value of the range of splitting merit
62 |         """
63 | 


--------------------------------------------------------------------------------
/river/tree/split_criterion/intra_cluster_variance_reduction_split_criterion.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from .variance_reduction_split_criterion import VarianceReductionSplitCriterion
 4 | 
 5 | 
 6 | # This class extends VarianceReductionSplitCriterion since it just computes
 7 | # the variance differently than its ancestor (considering multiple targets)
 8 | class IntraClusterVarianceReductionSplitCriterion(VarianceReductionSplitCriterion):
 9 |     def __init__(self, min_samples_split: int = 5):
10 |         super().__init__(min_samples_split)
11 | 
12 |     def merit_of_split(self, pre_split_dist, post_split_dist):
13 |         icvr = 0.0
14 |         n = list(pre_split_dist.values())[0].mean.n
15 | 
16 |         count = 0
17 | 
18 |         for dist in post_split_dist:
19 |             n_i = list(dist.values())[0].mean.n
20 |             if n_i >= self.min_samples_split:
21 |                 count += 1
22 | 
23 |         if count == len(post_split_dist):
24 |             icvr = self.compute_var(pre_split_dist)
25 |             for dist in post_split_dist:
26 |                 n_i = list(dist.values())[0].mean.n
27 |                 icvr -= n_i / n * self.compute_var(dist)
28 |         return icvr
29 | 
30 |     def current_merit(self, dist):
31 |         return self.compute_var(dist)
32 | 
33 |     @staticmethod
34 |     def compute_var(dist):
35 |         icvr = [vr.get() for vr in dist.values()]
36 |         n = len(icvr)
37 |         return sum(icvr) / n if n > 0 else 0.0
38 | 


--------------------------------------------------------------------------------
/river/tree/splitter/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module implements the Attribute Observers (AO) (or tree splitters) that are used by the
 3 | Hoeffding Trees (HT). It also implements the feature quantizers (FQ) used by Stochastic Gradient
 4 | Trees (SGT). AOs are a core aspect of the HTs construction, and might represent one of the major
 5 | bottlenecks when building the trees. The same holds for SGTs and FQs. The correct choice and setup
 6 | of a splitter might result in significant differences in the running time and memory usage of the
 7 | incremental decision trees.
 8 | 
 9 | AOs for classification and regression trees can be differentiated by using the property
10 | `is_target_class` (`True` for splitters designed to classification tasks). An error will be raised
11 | if one tries to use a classification splitter in a regression tree and vice-versa.
12 | Lastly, AOs cannot be used in SGT and FQs cannot be used in Hoeffding Trees. So, care must be taken
13 | when choosing the correct feature splitter.
14 | 
15 | """
16 | 
17 | from __future__ import annotations
18 | 
19 | from .base import Quantizer, Splitter
20 | from .ebst_splitter import EBSTSplitter
21 | from .exhaustive_splitter import ExhaustiveSplitter
22 | from .gaussian_splitter import GaussianSplitter
23 | from .histogram_splitter import HistogramSplitter
24 | from .qo_splitter import QOSplitter
25 | from .sgt_quantizer import DynamicQuantizer, StaticQuantizer
26 | from .tebst_splitter import TEBSTSplitter
27 | 
28 | __all__ = [
29 |     "DynamicQuantizer",
30 |     "EBSTSplitter",
31 |     "ExhaustiveSplitter",
32 |     "GaussianSplitter",
33 |     "HistogramSplitter",
34 |     "QOSplitter",
35 |     "Quantizer",
36 |     "Splitter",
37 |     "StaticQuantizer",
38 |     "TEBSTSplitter",
39 | ]
40 | 


--------------------------------------------------------------------------------
/river/tree/splitter/tebst_splitter.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from .ebst_splitter import EBSTSplitter
 4 | 
 5 | 
 6 | class TEBSTSplitter(EBSTSplitter):
 7 |     """Truncated E-BST.
 8 | 
 9 |     Variation of E-BST that rounds the incoming feature values before passing them to the binary
10 |     search tree (BST). By doing so, the attribute observer might reduce its processing time and
11 |     memory usage since small variations in the input values will end up being mapped to the same
12 |     BST node.
13 | 
14 |     Parameters
15 |     ----------
16 |     digits
17 |         The number of decimal places used to round the input feature values.
18 | 
19 |     """
20 | 
21 |     def __init__(self, digits: int = 1):
22 |         super().__init__()
23 |         self.digits = digits
24 | 
25 |     def update(self, att_val, target_val, w):
26 |         try:
27 |             att_val = round(att_val, self.digits)
28 |             super().update(att_val, target_val, w)
29 |         except TypeError:  # feature value is None
30 |             pass
31 | 
32 |     def cond_proba(self, att_val, target_val):
33 |         """Not implemented in regression splitters."""
34 |         raise NotImplementedError
35 | 


--------------------------------------------------------------------------------
/river/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | """Shared utility classes and functions"""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from . import inspect, math, norm, pretty, random
 6 | from .context_managers import log_method_calls
 7 | from .param_grid import expand_param_grid
 8 | from .rolling import Rolling, TimeRolling
 9 | from .sorted_window import SortedWindow
10 | from .vectordict import VectorDict
11 | 
12 | __all__ = [
13 |     "expand_param_grid",
14 |     "inspect",
15 |     "log_method_calls",
16 |     "math",
17 |     "pretty",
18 |     "random",
19 |     "norm",
20 |     "Rolling",
21 |     "SortedWindow",
22 |     "VectorDict",
23 |     "TimeRolling",
24 | ]
25 | 


--------------------------------------------------------------------------------
/river/utils/context_managers.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | from river.base.base import log_method_calls
4 | 
5 | __all__ = ["log_method_calls"]
6 | 


--------------------------------------------------------------------------------
/river/utils/random.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import math
 4 | import random
 5 | 
 6 | __all__ = ["poisson", "exponential"]
 7 | 
 8 | 
 9 | def poisson(rate: float, rng=random) -> int:
10 |     """Sample a random value from a Poisson distribution.
11 | 
12 |     Parameters
13 |     ----------
14 |     rate
15 |     rng
16 | 
17 |     References
18 |     ----------
19 |     [^1] [Wikipedia article](https://www.wikiwand.com/en/Poisson_distribution#/Generating_Poisson-distributed_random_variables)
20 | 
21 |     """
22 | 
23 |     L = math.exp(-rate)
24 |     k = 0
25 |     p = 1
26 | 
27 |     while p > L:
28 |         k += 1
29 |         p *= rng.random()
30 | 
31 |     return k - 1
32 | 
33 | 
34 | def exponential(rate: float = 1.0, rng=random) -> float:
35 |     """Sample a random value from a Poisson distribution.
36 | 
37 |     Parameters
38 |     ----------
39 |     rate
40 |     rng
41 | 
42 |     References
43 |     ----------
44 |     [^1]: [Wikipedia article](https://www.wikiwand.com/en/Exponential_distribution#Random_variate_generation)
45 | 
46 |     """
47 | 
48 |     u = rng.random()
49 | 
50 |     # Retrieve the λ value from the rate (β): β = 1 / λ
51 |     lmbda = 1.0 / rate
52 |     return -math.log(1 - u) / lmbda
53 | 


--------------------------------------------------------------------------------
/river/utils/sorted_window.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import bisect
 4 | import collections
 5 | 
 6 | 
 7 | class SortedWindow(collections.UserList):
 8 |     """Sorted running window data structure.
 9 | 
10 |     Parameters
11 |     ----------
12 |     size
13 |         Size of the window to compute the rolling quantile.
14 | 
15 |     Examples
16 |     --------
17 | 
18 |     >>> from river import utils
19 | 
20 |     >>> window = utils.SortedWindow(size=3)
21 | 
22 |     >>> for i in reversed(range(9)):
23 |     ...     window.append(i)
24 |     ...     print(window)
25 |     [8]
26 |     [7, 8]
27 |     [6, 7, 8]
28 |     [5, 6, 7]
29 |     [4, 5, 6]
30 |     [3, 4, 5]
31 |     [2, 3, 4]
32 |     [1, 2, 3]
33 |     [0, 1, 2]
34 | 
35 |     References
36 |     ----------
37 |     [^1]: [Left sorted inserts in Python](https://stackoverflow.com/questions/8024571/insert-an-item-into-sorted-list-in-python)
38 | 
39 |     """
40 | 
41 |     def __init__(self, size: int):
42 |         super().__init__()
43 |         self.unsorted_window: collections.deque = collections.deque(maxlen=size)
44 | 
45 |     @property
46 |     def size(self):
47 |         return self.unsorted_window.maxlen
48 | 
49 |     def append(self, x) -> None:
50 |         if len(self) >= self.size:
51 |             # The window is sorted, and a binary search is more optimized than linear search
52 |             start_deque = bisect.bisect_left(self, self.unsorted_window[0])
53 |             del self[start_deque]
54 | 
55 |         bisect.insort_left(self, x)
56 |         self.unsorted_window.append(x)
57 | 


--------------------------------------------------------------------------------
/river/utils/test_rolling.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import datetime as dt
 4 | 
 5 | import pytest
 6 | 
 7 | from river import proba, stats, utils
 8 | 
 9 | 
10 | def test_with_counter():
11 |     """
12 |     >>> from river import utils
13 |     >>> import collections
14 |     >>> collections.Counter.revert = collections.Counter.subtract
15 | 
16 |     >>> counter = utils.Rolling(collections.Counter(), window_size=3)
17 | 
18 |     >>> for i in range(5):
19 |     ...     counter.update([i])
20 | 
21 |     >>> counter
22 |     Counter({2: 1, 3: 1, 4: 1, 0: 0, 1: 0})
23 | 
24 |     >>> counter.most_common(3)
25 |     [(2, 1), (3, 1), (4, 1)]
26 | 
27 |     >>> counter[4]
28 |     1
29 | 
30 |     """
31 | 
32 | 
33 | def test_rolling_with_not_rollable():
34 |     with pytest.raises(ValueError):
35 |         utils.Rolling(stats.Quantile(), window_size=10)
36 | 
37 | 
38 | def test_time_rolling_with_not_rollable():
39 |     with pytest.raises(ValueError):
40 |         utils.TimeRolling(stats.Quantile(), period=dt.timedelta(seconds=10))
41 | 
42 | 
43 | def test_issue_1343():
44 |     """
45 | 
46 |     https://github.com/online-ml/river/issues/1343
47 | 
48 |     """
49 |     rmean = utils.TimeRolling(proba.MultivariateGaussian(), period=dt.timedelta(microseconds=1))
50 |     t = dt.datetime.now()
51 |     rmean.update({"a": 0}, t=t)
52 |     rmean.update({"a": 1}, t=t)
53 | 


--------------------------------------------------------------------------------