├── sklearn ├── _loss │ ├── __init__.py │ └── tests │ │ └── __init__.py ├── tests │ ├── __init__.py │ ├── test_check_build.py │ ├── test_init.py │ └── test_build.py ├── cluster │ ├── tests │ │ ├── __init__.py │ │ └── common.py │ └── _k_means_common.pxd ├── compose │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── datasets │ ├── data │ │ ├── __init__.py │ │ ├── digits.csv.gz │ │ ├── diabetes_data.csv.gz │ │ ├── diabetes_target.csv.gz │ │ ├── linnerud_exercise.csv │ │ └── linnerud_physiological.csv │ ├── descr │ │ ├── __init__.py │ │ ├── linnerud.rst │ │ └── covtype.rst │ ├── images │ │ ├── __init__.py │ │ ├── china.jpg │ │ ├── flower.jpg │ │ └── README.txt │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── openml │ │ │ │ ├── __init__.py │ │ │ │ ├── id_1 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-1.json.gz │ │ │ │ │ ├── api-v1-jdf-1.json.gz │ │ │ │ │ ├── api-v1-jdq-1.json.gz │ │ │ │ │ └── data-v1-dl-1.arff.gz │ │ │ │ ├── id_2 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-2.json.gz │ │ │ │ │ ├── api-v1-jdf-2.json.gz │ │ │ │ │ ├── api-v1-jdq-2.json.gz │ │ │ │ │ ├── data-v1-dl-1666876.arff.gz │ │ │ │ │ ├── api-v1-jdl-dn-anneal-l-2-dv-1.json.gz │ │ │ │ │ └── api-v1-jdl-dn-anneal-l-2-s-act-.json.gz │ │ │ │ ├── id_292 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-292.json.gz │ │ │ │ │ ├── api-v1-jd-40981.json.gz │ │ │ │ │ ├── api-v1-jdf-292.json.gz │ │ │ │ │ ├── api-v1-jdf-40981.json.gz │ │ │ │ │ ├── data-v1-dl-49822.arff.gz │ │ │ │ │ ├── api-v1-jdl-dn-australian-l-2-dv-1.json.gz │ │ │ │ │ ├── api-v1-jdl-dn-australian-l-2-s-act-.json.gz │ │ │ │ │ └── api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz │ │ │ │ ├── id_3 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-3.json.gz │ │ │ │ │ ├── api-v1-jdf-3.json.gz │ │ │ │ │ ├── api-v1-jdq-3.json.gz │ │ │ │ │ └── data-v1-dl-3.arff.gz │ │ │ │ ├── id_561 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-561.json.gz │ │ │ │ │ ├── api-v1-jdf-561.json.gz │ │ │ │ │ ├── api-v1-jdq-561.json.gz │ │ │ │ │ ├── data-v1-dl-52739.arff.gz │ │ │ │ │ ├── api-v1-jdl-dn-cpu-l-2-dv-1.json.gz │ │ │ │ │ └── api-v1-jdl-dn-cpu-l-2-s-act-.json.gz │ │ │ │ ├── id_61 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-61.json.gz │ │ │ │ │ ├── api-v1-jdf-61.json.gz │ │ │ │ │ ├── api-v1-jdq-61.json.gz │ │ │ │ │ ├── data-v1-dl-61.arff.gz │ │ │ │ │ ├── api-v1-jdl-dn-iris-l-2-dv-1.json.gz │ │ │ │ │ └── api-v1-jdl-dn-iris-l-2-s-act-.json.gz │ │ │ │ ├── id_62 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-62.json.gz │ │ │ │ │ ├── api-v1-jdf-62.json.gz │ │ │ │ │ ├── api-v1-jdq-62.json.gz │ │ │ │ │ └── data-v1-dl-52352.arff.gz │ │ │ │ ├── id_1119 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-1119.json.gz │ │ │ │ │ ├── api-v1-jdf-1119.json.gz │ │ │ │ │ ├── api-v1-jdq-1119.json.gz │ │ │ │ │ ├── data-v1-dl-54002.arff.gz │ │ │ │ │ ├── api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz │ │ │ │ │ └── api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz │ │ │ │ ├── id_40589 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-40589.json.gz │ │ │ │ │ ├── api-v1-jdf-40589.json.gz │ │ │ │ │ ├── api-v1-jdq-40589.json.gz │ │ │ │ │ ├── data-v1-dl-4644182.arff.gz │ │ │ │ │ ├── api-v1-jdl-dn-emotions-l-2-dv-3.json.gz │ │ │ │ │ └── api-v1-jdl-dn-emotions-l-2-s-act-.json.gz │ │ │ │ ├── id_40675 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-40675.json.gz │ │ │ │ │ ├── api-v1-jdf-40675.json.gz │ │ │ │ │ ├── api-v1-jdq-40675.json.gz │ │ │ │ │ ├── data-v1-dl-4965250.arff.gz │ │ │ │ │ ├── api-v1-jdl-dn-glass2-l-2-dv-1.json.gz │ │ │ │ │ ├── api-v1-jdl-dn-glass2-l-2-s-act-.json.gz │ │ │ │ │ └── api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz │ │ │ │ ├── id_40945 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-40945.json.gz │ │ │ │ │ ├── api-v1-jdf-40945.json.gz │ │ │ │ │ ├── api-v1-jdq-40945.json.gz │ │ │ │ │ └── data-v1-dl-16826755.arff.gz │ │ │ │ ├── id_40966 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-40966.json.gz │ │ │ │ │ ├── api-v1-jdf-40966.json.gz │ │ │ │ │ ├── api-v1-jdq-40966.json.gz │ │ │ │ │ ├── data-v1-dl-17928620.arff.gz │ │ │ │ │ ├── api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz │ │ │ │ │ └── api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz │ │ │ │ └── id_42585 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── api-v1-jd-42585.json.gz │ │ │ │ │ ├── api-v1-jdf-42585.json.gz │ │ │ │ │ ├── api-v1-jdq-42585.json.gz │ │ │ │ │ └── data-v1-dl-21854866.arff.gz │ │ │ ├── svmlight_invalid_order.txt │ │ │ ├── svmlight_invalid.txt │ │ │ ├── svmlight_multilabel.txt │ │ │ └── svmlight_classification.txt │ │ ├── conftest.py │ │ └── test_olivetti_faces.py │ └── setup.py ├── impute │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── metrics │ ├── _plot │ │ ├── __init__.py │ │ └── tests │ │ │ └── __init__.py │ ├── tests │ │ └── __init__.py │ ├── cluster │ │ ├── tests │ │ │ └── __init__.py │ │ └── setup.py │ └── setup.py ├── mixture │ ├── tests │ │ ├── __init__.py │ │ └── test_mixture.py │ └── __init__.py ├── svm │ ├── tests │ │ └── __init__.py │ ├── src │ │ ├── libsvm │ │ │ ├── libsvm_template.cpp │ │ │ ├── _svm_cython_blas_helpers.h │ │ │ └── LIBSVM_CHANGES │ │ └── liblinear │ │ │ ├── _cython_blas_helpers.h │ │ │ └── tron.h │ ├── _newrand.pyx │ └── __init__.py ├── tree │ ├── tests │ │ └── __init__.py │ ├── __init__.py │ └── setup.py ├── utils │ ├── tests │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_arpack.py │ │ ├── test_weight_vector.py │ │ ├── test_arrayfuncs.py │ │ ├── test_optimize.py │ │ ├── test_cython_templating.py │ │ ├── test_fast_dict.py │ │ ├── test_parallel.py │ │ └── test_show_versions.py │ ├── _typedefs.pxd │ ├── _fast_dict.pxd │ ├── _joblib.py │ ├── _logistic_sigmoid.pyx │ ├── _typedefs.pyx │ ├── murmurhash.pxd │ ├── _arpack.py │ └── src │ │ └── MurmurHash3.h ├── covariance │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── ensemble │ ├── tests │ │ └── __init__.py │ └── _hist_gradient_boosting │ │ ├── tests │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── _bitset.pxd │ │ ├── common.pyx │ │ └── common.pxd ├── experimental │ ├── tests │ │ ├── __init__.py │ │ └── test_enable_hist_gradient_boosting.py │ ├── __init__.py │ ├── enable_iterative_imputer.py │ ├── enable_hist_gradient_boosting.py │ └── enable_halving_search_cv.py ├── inspection │ ├── _plot │ │ ├── __init__.py │ │ └── tests │ │ │ └── __init__.py │ ├── tests │ │ └── __init__.py │ ├── setup.py │ └── __init__.py ├── linear_model │ ├── tests │ │ └── __init__.py │ ├── _glm │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_link.py │ │ └── __init__.py │ ├── _sgd_fast_helpers.h │ ├── _sgd_fast.pxd │ └── setup.py ├── manifold │ ├── tests │ │ └── __init__.py │ ├── __init__.py │ └── setup.py ├── neighbors │ ├── tests │ │ ├── __init__.py │ │ └── test_kd_tree.py │ ├── _partition_nodes.pxd │ ├── _distance_metric.py │ ├── setup.py │ └── __init__.py ├── decomposition │ ├── tests │ │ └── __init__.py │ ├── setup.py │ ├── _cdnmf_fast.pyx │ └── __init__.py ├── externals │ ├── _packaging │ │ └── __init__.py │ ├── __init__.py │ ├── README │ └── conftest.py ├── feature_selection │ └── tests │ │ └── __init__.py ├── gaussian_process │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── model_selection │ └── tests │ │ ├── __init__.py │ │ └── common.py ├── neural_network │ ├── tests │ │ ├── __init__.py │ │ └── test_base.py │ └── __init__.py ├── preprocessing │ ├── tests │ │ └── __init__.py │ └── setup.py ├── semi_supervised │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── cross_decomposition │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── feature_extraction │ ├── tests │ │ └── __init__.py │ ├── __init__.py │ └── setup.py ├── __check_build │ ├── _check_build.pyx │ └── setup.py └── _distributor_init.py ├── doc ├── testimonials │ ├── images │ │ ├── Makefile │ │ ├── inria.png │ │ ├── mars.png │ │ ├── yhat.png │ │ ├── zopa.png │ │ ├── aweber.png │ │ ├── booking.png │ │ ├── infonea.jpg │ │ ├── lovely.png │ │ ├── okcupid.png │ │ ├── phimeca.png │ │ ├── spotify.png │ │ ├── betaworks.png │ │ ├── birchbox.jpg │ │ ├── datarobot.png │ │ ├── evernote.png │ │ ├── howaboutwe.png │ │ ├── jpmorgan.png │ │ ├── machinalis.png │ │ ├── peerindex.png │ │ ├── rangespan.png │ │ ├── change-logo.png │ │ ├── dataiku_logo.png │ │ ├── datapublica.png │ │ ├── huggingface.png │ │ ├── solido_logo.png │ │ ├── ottogroup_logo.png │ │ ├── bestofmedia-logo.png │ │ ├── telecomparistech.jpg │ │ └── bnp_paribas_cardif.png │ └── README.txt ├── sphinxext │ └── MANIFEST.in ├── images │ ├── axa.png │ ├── bcg.png │ ├── bnp.png │ ├── dysco.png │ ├── intel.png │ ├── iris.pdf │ ├── anaconda.png │ ├── cds-logo.png │ ├── columbia.png │ ├── dataiku.png │ ├── fujitsu.png │ ├── ml_map.png │ ├── no_image.png │ ├── nvidia.png │ ├── telecom.png │ ├── axa-small.png │ ├── bcg-small.png │ ├── bnp-small.png │ ├── digicosme.png │ ├── inria-logo.jpg │ ├── last_digit.png │ ├── logo_APHP.png │ ├── microsoft.png │ ├── rbm_graph.png │ ├── dataiku-small.png │ ├── fujitsu-small.png │ ├── google-small.png │ ├── inria-small.png │ ├── intel-small.png │ ├── nvidia-small.png │ ├── sloan_banner.png │ ├── telecom-small.png │ ├── zalando_logo.png │ ├── columbia-small.png │ ├── fnrs-logo-small.png │ ├── lda_model_graph.png │ ├── logo_APHP_text.png │ ├── microsoft-small.png │ ├── multi_org_chart.png │ ├── nyu_short_color.png │ ├── quansight-labs.png │ ├── sydney-primary.jpeg │ ├── sloan_logo-small.png │ ├── zalando_logo-small.png │ ├── grid_search_workflow.png │ ├── quansight-labs-small.png │ ├── sydney-stacked-small.png │ ├── plot_face_recognition_1.png │ ├── plot_face_recognition_2.png │ ├── scikit-learn-logo-small.png │ ├── scikit-learn-logo-notext.png │ ├── grid_search_cross_validation.png │ ├── multilayerperceptron_network.png │ ├── plot_digits_classification.png │ └── png-logo-inria-la-fondation.png ├── logos │ ├── favicon.ico │ ├── identity.pdf │ ├── scikit-learn-logo.bmp │ ├── scikit-learn-logo.png │ ├── scikit-learn-logo-notext.png │ ├── scikit-learn-logo-small.png │ └── scikit-learn-logo-thumb.png ├── modules │ ├── glm_data │ │ ├── lasso_enet_coordinate_descent.png │ │ └── poisson_gamma_tweedie_distributions.png │ ├── pipeline.rst │ └── isotonic.rst ├── themes │ └── scikit-learn-modern │ │ ├── theme.conf │ │ └── search.html ├── tutorial │ ├── common_includes │ │ └── info.txt │ ├── text_analytics │ │ ├── .gitignore │ │ ├── data │ │ │ ├── movie_reviews │ │ │ │ └── fetch_data.py │ │ │ └── twenty_newsgroups │ │ │ │ └── fetch_data.py │ │ └── solutions │ │ │ └── generate_skeletons.py │ └── index.rst ├── templates │ ├── generate_deprecated.sh │ ├── class.rst │ ├── function.rst │ ├── numpydoc_docstring.rst │ ├── class_with_call.rst │ ├── deprecated_function.rst │ ├── deprecated_class_without_init.rst │ ├── deprecated_class.rst │ ├── deprecated_class_with_call.rst │ └── redirects.html ├── README.md ├── binder │ └── requirements.txt ├── computing.rst ├── model_selection.rst ├── developers │ └── index.rst ├── contents.rst ├── unsupervised_learning.rst ├── communication_team.rst ├── whats_new │ └── changelog_legend.inc ├── user_guide.rst ├── preface.rst ├── authors_emeritus.rst ├── supervised_learning.rst ├── includes │ ├── big_toc_css.rst │ └── bigger_toc_css.rst ├── whats_new.rst ├── datasets │ ├── real_world.rst │ └── toy_dataset.rst └── inspection.rst ├── .circleci └── artifact_path ├── .gitattributes ├── benchmarks ├── .gitignore ├── plot_tsne_mnist.py └── bench_plot_ward.py ├── examples ├── README.txt ├── exercises │ ├── README.txt │ ├── plot_digits_classification_exercise.py │ └── plot_cv_digits.py ├── cluster │ ├── README.txt │ └── plot_kmeans_plusplus.py ├── tree │ └── README.txt ├── ensemble │ └── README.txt ├── inspection │ └── README.txt ├── svm │ ├── README.txt │ └── plot_svm_nonlinear.py ├── bicluster │ └── README.txt ├── datasets │ ├── README.txt │ └── plot_digits_last_image.py ├── classification │ └── README.txt ├── impute │ └── README.txt ├── miscellaneous │ ├── README.txt │ └── plot_changed_only_pprint_parameter.py ├── mixture │ └── README.txt ├── neighbors │ └── README.txt ├── preprocessing │ └── README.txt ├── covariance │ └── README.txt ├── decomposition │ ├── README.txt │ └── plot_beta_divergence.py ├── manifold │ └── README.txt ├── multioutput │ └── README.txt ├── linear_model │ ├── README.txt │ ├── plot_lasso_lars.py │ ├── plot_sgd_separating_hyperplane.py │ └── plot_sgd_loss_functions.py ├── neural_networks │ └── README.txt ├── model_selection │ ├── README.txt │ └── plot_cv_predict.py ├── text │ └── README.txt ├── calibration │ └── README.txt ├── feature_selection │ ├── README.txt │ └── plot_rfe_digits.py ├── cross_decomposition │ └── README.txt ├── kernel_approximation │ └── README.txt ├── release_highlights │ └── README.txt ├── semi_supervised │ └── README.txt ├── gaussian_process │ └── README.txt ├── applications │ └── README.txt └── compose │ └── README.txt ├── asv_benchmarks ├── benchmarks │ ├── __init__.py │ ├── svm.py │ ├── manifold.py │ └── neighbors.py └── .gitignore ├── .binder └── requirements.txt ├── .coveragerc ├── .github ├── labeler-file-extensions.yml ├── workflows │ ├── check-manifest.yml │ ├── unassign.yml │ ├── labeler-title-regex.yml │ ├── labeler-module.yml │ ├── assign.yml │ └── twitter.yml ├── ISSUE_TEMPLATE │ ├── doc_improvement.yml │ ├── feature_request.yml │ └── config.yml ├── FUNDING.yml ├── scripts │ └── label_title_regex.py └── PULL_REQUEST_TEMPLATE.md ├── lgtm.yml ├── conftest.py ├── SECURITY.md ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── .git-blame-ignore-revs ├── .codecov.yml ├── MANIFEST.in ├── pyproject.toml ├── maint_tools └── sort_whats_new.py └── .gitignore /sklearn/_loss/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/testimonials/images/Makefile: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/_loss/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/cluster/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/compose/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/impute/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/metrics/_plot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/metrics/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/mixture/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/svm/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/tree/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/utils/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/covariance/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/images/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/ensemble/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/experimental/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/inspection/_plot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/inspection/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/linear_model/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/manifold/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/neighbors/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/decomposition/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/externals/_packaging/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/feature_selection/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/gaussian_process/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/inspection/_plot/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/metrics/_plot/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/metrics/cluster/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/model_selection/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/neural_network/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/preprocessing/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/semi_supervised/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.circleci/artifact_path: -------------------------------------------------------------------------------- 1 | 0/doc/_changed.html 2 | -------------------------------------------------------------------------------- /sklearn/cross_decomposition/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | /doc/whats_new/v*.rst merge=union 2 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/linear_model/_glm/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # License: BSD 3 clause 2 | -------------------------------------------------------------------------------- /benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | /bhtsne 2 | *.npy 3 | *.json 4 | /mnist_tsne_output/ 5 | -------------------------------------------------------------------------------- /doc/sphinxext/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include tests *.py 2 | include *.txt 3 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | Examples 4 | ======== 5 | -------------------------------------------------------------------------------- /sklearn/__check_build/_check_build.pyx: -------------------------------------------------------------------------------- 1 | def check_build(): 2 | return 3 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_invalid_order.txt: -------------------------------------------------------------------------------- 1 | -1 5:2.5 2:-5.2 15:1.5 2 | -------------------------------------------------------------------------------- /asv_benchmarks/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | """Benchmark suite for scikit-learn using ASV""" 2 | -------------------------------------------------------------------------------- /doc/images/axa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/axa.png -------------------------------------------------------------------------------- /doc/images/bcg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/bcg.png -------------------------------------------------------------------------------- /doc/images/bnp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/bnp.png -------------------------------------------------------------------------------- /sklearn/externals/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | External, bundled dependencies. 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /doc/images/dysco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/dysco.png -------------------------------------------------------------------------------- /doc/images/intel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/intel.png -------------------------------------------------------------------------------- /doc/images/iris.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/iris.pdf -------------------------------------------------------------------------------- /doc/images/anaconda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/anaconda.png -------------------------------------------------------------------------------- /doc/images/cds-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/cds-logo.png -------------------------------------------------------------------------------- /doc/images/columbia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/columbia.png -------------------------------------------------------------------------------- /doc/images/dataiku.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/dataiku.png -------------------------------------------------------------------------------- /doc/images/fujitsu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/fujitsu.png -------------------------------------------------------------------------------- /doc/images/ml_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/ml_map.png -------------------------------------------------------------------------------- /doc/images/no_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/no_image.png -------------------------------------------------------------------------------- /doc/images/nvidia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/nvidia.png -------------------------------------------------------------------------------- /doc/images/telecom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/telecom.png -------------------------------------------------------------------------------- /doc/logos/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/favicon.ico -------------------------------------------------------------------------------- /doc/logos/identity.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/identity.pdf -------------------------------------------------------------------------------- /doc/images/axa-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/axa-small.png -------------------------------------------------------------------------------- /doc/images/bcg-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/bcg-small.png -------------------------------------------------------------------------------- /doc/images/bnp-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/bnp-small.png -------------------------------------------------------------------------------- /doc/images/digicosme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/digicosme.png -------------------------------------------------------------------------------- /doc/images/inria-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/inria-logo.jpg -------------------------------------------------------------------------------- /doc/images/last_digit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/last_digit.png -------------------------------------------------------------------------------- /doc/images/logo_APHP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/logo_APHP.png -------------------------------------------------------------------------------- /doc/images/microsoft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/microsoft.png -------------------------------------------------------------------------------- /doc/images/rbm_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/rbm_graph.png -------------------------------------------------------------------------------- /doc/images/dataiku-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/dataiku-small.png -------------------------------------------------------------------------------- /doc/images/fujitsu-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/fujitsu-small.png -------------------------------------------------------------------------------- /doc/images/google-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/google-small.png -------------------------------------------------------------------------------- /doc/images/inria-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/inria-small.png -------------------------------------------------------------------------------- /doc/images/intel-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/intel-small.png -------------------------------------------------------------------------------- /doc/images/nvidia-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/nvidia-small.png -------------------------------------------------------------------------------- /doc/images/sloan_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/sloan_banner.png -------------------------------------------------------------------------------- /doc/images/telecom-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/telecom-small.png -------------------------------------------------------------------------------- /doc/images/zalando_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/zalando_logo.png -------------------------------------------------------------------------------- /examples/exercises/README.txt: -------------------------------------------------------------------------------- 1 | Tutorial exercises 2 | ------------------ 3 | 4 | Exercises for the tutorials 5 | -------------------------------------------------------------------------------- /asv_benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | *__pycache__* 2 | env/ 3 | html/ 4 | results/ 5 | scikit-learn/ 6 | benchmarks/cache/ 7 | -------------------------------------------------------------------------------- /doc/images/columbia-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/columbia-small.png -------------------------------------------------------------------------------- /doc/images/fnrs-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/fnrs-logo-small.png -------------------------------------------------------------------------------- /doc/images/lda_model_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/lda_model_graph.png -------------------------------------------------------------------------------- /doc/images/logo_APHP_text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/logo_APHP_text.png -------------------------------------------------------------------------------- /doc/images/microsoft-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/microsoft-small.png -------------------------------------------------------------------------------- /doc/images/multi_org_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/multi_org_chart.png -------------------------------------------------------------------------------- /doc/images/nyu_short_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/nyu_short_color.png -------------------------------------------------------------------------------- /doc/images/quansight-labs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/quansight-labs.png -------------------------------------------------------------------------------- /doc/images/sydney-primary.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/sydney-primary.jpeg -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_invalid.txt: -------------------------------------------------------------------------------- 1 | python 2:2.5 10:-5.2 15:1.5 2 | 2.0 5:1.0 12:-3 3 | 3.0 20:27 4 | -------------------------------------------------------------------------------- /doc/images/sloan_logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/sloan_logo-small.png -------------------------------------------------------------------------------- /doc/images/zalando_logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/zalando_logo-small.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo.bmp -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/inria.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/inria.png -------------------------------------------------------------------------------- /doc/testimonials/images/mars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/mars.png -------------------------------------------------------------------------------- /doc/testimonials/images/yhat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/yhat.png -------------------------------------------------------------------------------- /doc/testimonials/images/zopa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/zopa.png -------------------------------------------------------------------------------- /sklearn/datasets/images/china.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/images/china.jpg -------------------------------------------------------------------------------- /doc/images/grid_search_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/grid_search_workflow.png -------------------------------------------------------------------------------- /doc/images/quansight-labs-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/quansight-labs-small.png -------------------------------------------------------------------------------- /doc/images/sydney-stacked-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/sydney-stacked-small.png -------------------------------------------------------------------------------- /doc/testimonials/images/aweber.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/aweber.png -------------------------------------------------------------------------------- /doc/testimonials/images/booking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/booking.png -------------------------------------------------------------------------------- /doc/testimonials/images/infonea.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/infonea.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/lovely.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/lovely.png -------------------------------------------------------------------------------- /doc/testimonials/images/okcupid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/okcupid.png -------------------------------------------------------------------------------- /doc/testimonials/images/phimeca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/phimeca.png -------------------------------------------------------------------------------- /doc/testimonials/images/spotify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/spotify.png -------------------------------------------------------------------------------- /sklearn/datasets/data/digits.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/data/digits.csv.gz -------------------------------------------------------------------------------- /sklearn/datasets/images/flower.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/images/flower.jpg -------------------------------------------------------------------------------- /doc/images/plot_face_recognition_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/plot_face_recognition_1.png -------------------------------------------------------------------------------- /doc/images/plot_face_recognition_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/plot_face_recognition_2.png -------------------------------------------------------------------------------- /doc/images/scikit-learn-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/scikit-learn-logo-small.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo-small.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo-thumb.png -------------------------------------------------------------------------------- /doc/testimonials/images/betaworks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/betaworks.png -------------------------------------------------------------------------------- /doc/testimonials/images/birchbox.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/birchbox.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/datarobot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/datarobot.png -------------------------------------------------------------------------------- /doc/testimonials/images/evernote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/evernote.png -------------------------------------------------------------------------------- /doc/testimonials/images/howaboutwe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/howaboutwe.png -------------------------------------------------------------------------------- /doc/testimonials/images/jpmorgan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/jpmorgan.png -------------------------------------------------------------------------------- /doc/testimonials/images/machinalis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/machinalis.png -------------------------------------------------------------------------------- /doc/testimonials/images/peerindex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/peerindex.png -------------------------------------------------------------------------------- /doc/testimonials/images/rangespan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/rangespan.png -------------------------------------------------------------------------------- /doc/images/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /doc/testimonials/images/change-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/change-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/dataiku_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/dataiku_logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/datapublica.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/datapublica.png -------------------------------------------------------------------------------- /doc/testimonials/images/huggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/huggingface.png -------------------------------------------------------------------------------- /doc/testimonials/images/solido_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/solido_logo.png -------------------------------------------------------------------------------- /doc/images/grid_search_cross_validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/grid_search_cross_validation.png -------------------------------------------------------------------------------- /doc/images/multilayerperceptron_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/multilayerperceptron_network.png -------------------------------------------------------------------------------- /doc/images/plot_digits_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/plot_digits_classification.png -------------------------------------------------------------------------------- /doc/images/png-logo-inria-la-fondation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/png-logo-inria-la-fondation.png -------------------------------------------------------------------------------- /doc/testimonials/images/ottogroup_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/ottogroup_logo.png -------------------------------------------------------------------------------- /sklearn/datasets/data/diabetes_data.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/data/diabetes_data.csv.gz -------------------------------------------------------------------------------- /doc/testimonials/images/bestofmedia-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/bestofmedia-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/telecomparistech.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/telecomparistech.jpg -------------------------------------------------------------------------------- /sklearn/datasets/data/diabetes_target.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/data/diabetes_target.csv.gz -------------------------------------------------------------------------------- /doc/testimonials/images/bnp_paribas_cardif.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/bnp_paribas_cardif.png -------------------------------------------------------------------------------- /examples/cluster/README.txt: -------------------------------------------------------------------------------- 1 | .. _cluster_examples: 2 | 3 | Clustering 4 | ---------- 5 | 6 | Examples concerning the :mod:`sklearn.cluster` module. 7 | -------------------------------------------------------------------------------- /examples/tree/README.txt: -------------------------------------------------------------------------------- 1 | .. _tree_examples: 2 | 3 | Decision Trees 4 | -------------- 5 | 6 | Examples concerning the :mod:`sklearn.tree` module. 7 | -------------------------------------------------------------------------------- /doc/modules/glm_data/lasso_enet_coordinate_descent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/modules/glm_data/lasso_enet_coordinate_descent.png -------------------------------------------------------------------------------- /examples/ensemble/README.txt: -------------------------------------------------------------------------------- 1 | .. _ensemble_examples: 2 | 3 | Ensemble methods 4 | ---------------- 5 | 6 | Examples concerning the :mod:`sklearn.ensemble` module. 7 | -------------------------------------------------------------------------------- /examples/inspection/README.txt: -------------------------------------------------------------------------------- 1 | .. _inspection_examples: 2 | 3 | Inspection 4 | ---------- 5 | 6 | Examples related to the :mod:`sklearn.inspection` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/svm/README.txt: -------------------------------------------------------------------------------- 1 | .. _svm_examples: 2 | 3 | Support Vector Machines 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.svm` module. 7 | -------------------------------------------------------------------------------- /examples/bicluster/README.txt: -------------------------------------------------------------------------------- 1 | .. _bicluster_examples: 2 | 3 | Biclustering 4 | ------------ 5 | 6 | Examples concerning the :mod:`sklearn.cluster.bicluster` module. 7 | -------------------------------------------------------------------------------- /examples/datasets/README.txt: -------------------------------------------------------------------------------- 1 | .. _dataset_examples: 2 | 3 | Dataset examples 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.datasets` module. 7 | -------------------------------------------------------------------------------- /sklearn/cross_decomposition/__init__.py: -------------------------------------------------------------------------------- 1 | from ._pls import PLSCanonical, PLSRegression, PLSSVD, CCA 2 | 3 | __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"] 4 | -------------------------------------------------------------------------------- /doc/modules/glm_data/poisson_gamma_tweedie_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png -------------------------------------------------------------------------------- /examples/classification/README.txt: -------------------------------------------------------------------------------- 1 | .. _classification_examples: 2 | 3 | Classification 4 | ----------------------- 5 | 6 | General examples about classification algorithms. 7 | -------------------------------------------------------------------------------- /examples/impute/README.txt: -------------------------------------------------------------------------------- 1 | .. _impute_examples: 2 | 3 | Missing Value Imputation 4 | ------------------------ 5 | 6 | Examples concerning the :mod:`sklearn.impute` module. 7 | -------------------------------------------------------------------------------- /examples/miscellaneous/README.txt: -------------------------------------------------------------------------------- 1 | .. _miscellaneous_examples: 2 | 3 | Miscellaneous 4 | ------------- 5 | 6 | Miscellaneous and introductory examples for scikit-learn. 7 | 8 | -------------------------------------------------------------------------------- /examples/mixture/README.txt: -------------------------------------------------------------------------------- 1 | .. _mixture_examples: 2 | 3 | Gaussian Mixture Models 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.mixture` module. 7 | -------------------------------------------------------------------------------- /examples/neighbors/README.txt: -------------------------------------------------------------------------------- 1 | .. _neighbors_examples: 2 | 3 | Nearest Neighbors 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.neighbors` module. 7 | -------------------------------------------------------------------------------- /examples/preprocessing/README.txt: -------------------------------------------------------------------------------- 1 | .. _preprocessing_examples: 2 | 3 | Preprocessing 4 | ------------- 5 | 6 | Examples concerning the :mod:`sklearn.preprocessing` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/api-v1-jdf-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1/api-v1-jdf-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/api-v1-jdq-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1/api-v1-jdq-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/data-v1-dl-1.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1/data-v1-dl-1.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jd-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jd-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jdf-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jdf-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/api-v1-jd-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_3/api-v1-jd-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/api-v1-jdf-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_3/api-v1-jdf-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/api-v1-jdq-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_3/api-v1-jdq-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/data-v1-dl-3.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_3/data-v1-dl-3.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_multilabel.txt: -------------------------------------------------------------------------------- 1 | # multilabel dataset in SVMlight format 2 | 1,0 2:2.5 10:-5.2 15:1.5 3 | 2 5:1.0 12:-3 4 | 2:3.5 11:26 5 | 1,2 20:27 6 | -------------------------------------------------------------------------------- /examples/covariance/README.txt: -------------------------------------------------------------------------------- 1 | .. _covariance_examples: 2 | 3 | Covariance estimation 4 | --------------------- 5 | 6 | Examples concerning the :mod:`sklearn.covariance` module. 7 | -------------------------------------------------------------------------------- /examples/decomposition/README.txt: -------------------------------------------------------------------------------- 1 | .. _decomposition_examples: 2 | 3 | Decomposition 4 | ------------- 5 | 6 | Examples concerning the :mod:`sklearn.decomposition` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/manifold/README.txt: -------------------------------------------------------------------------------- 1 | .. _manifold_examples: 2 | 3 | Manifold learning 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.manifold` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/multioutput/README.txt: -------------------------------------------------------------------------------- 1 | .. _multioutput_examples: 2 | 3 | Multioutput methods 4 | ------------------- 5 | 6 | Examples concerning the :mod:`sklearn.multioutput` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jd-292.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-292.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jd-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jd-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jd-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jd-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jdf-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jdf-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jdq-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jdq-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/data-v1-dl-61.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/data-v1-dl-61.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/api-v1-jd-62.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_62/api-v1-jd-62.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/api-v1-jdf-62.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_62/api-v1-jdf-62.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/api-v1-jdq-62.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_62/api-v1-jdq-62.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jd-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jd-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jd-40981.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-40981.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-292.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-292.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jdf-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jdf-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jdq-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jdq-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/data-v1-dl-52352.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_62/data-v1-dl-52352.arff.gz -------------------------------------------------------------------------------- /examples/linear_model/README.txt: -------------------------------------------------------------------------------- 1 | .. _linear_examples: 2 | 3 | Generalized Linear Models 4 | ------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.linear_model` module. 7 | -------------------------------------------------------------------------------- /examples/neural_networks/README.txt: -------------------------------------------------------------------------------- 1 | .. _neural_network_examples: 2 | 3 | Neural Networks 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.neural_network` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/data-v1-dl-1666876.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/data-v1-dl-1666876.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-40981.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-40981.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/data-v1-dl-49822.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/data-v1-dl-49822.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jd-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jd-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jdf-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdf-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jdq-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdq-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jd-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jd-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdf-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdf-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdq-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdq-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/api-v1-jd-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40945/api-v1-jd-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/api-v1-jdf-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdf-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/api-v1-jdq-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdq-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jd-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jd-40966.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jdf-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdf-40966.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jdq-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdq-40966.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/api-v1-jd-42585.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_42585/api-v1-jd-42585.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/api-v1-jdf-42585.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdf-42585.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/api-v1-jdq-42585.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdq-42585.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/data-v1-dl-52739.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/data-v1-dl-52739.arff.gz -------------------------------------------------------------------------------- /examples/model_selection/README.txt: -------------------------------------------------------------------------------- 1 | .. _model_selection_examples: 2 | 3 | Model Selection 4 | ----------------------- 5 | 6 | Examples related to the :mod:`sklearn.model_selection` module. 7 | -------------------------------------------------------------------------------- /examples/text/README.txt: -------------------------------------------------------------------------------- 1 | .. _text_examples: 2 | 3 | Working with text documents 4 | ---------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.feature_extraction.text` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/data-v1-dl-4644182.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/data-v1-dl-4644182.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/data-v1-dl-4965250.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/data-v1-dl-4965250.arff.gz -------------------------------------------------------------------------------- /.binder/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn 2 | --pre 3 | matplotlib 4 | scikit-image 5 | pandas 6 | sphinx-gallery 7 | scikit-learn 8 | 9 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn-modern/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | pygments_style = default 4 | stylesheet = css/theme.css 5 | 6 | [options] 7 | google_analytics = true 8 | mathjax_path = 9 | -------------------------------------------------------------------------------- /examples/calibration/README.txt: -------------------------------------------------------------------------------- 1 | .. _calibration_examples: 2 | 3 | Calibration 4 | ----------------------- 5 | 6 | Examples illustrating the calibration of predicted probabilities of classifiers. 7 | -------------------------------------------------------------------------------- /examples/feature_selection/README.txt: -------------------------------------------------------------------------------- 1 | .. _feature_selection_examples: 2 | 3 | Feature Selection 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.feature_selection` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/data-v1-dl-16826755.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40945/data-v1-dl-16826755.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/data-v1-dl-17928620.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/data-v1-dl-17928620.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/data-v1-dl-21854866.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_42585/data-v1-dl-21854866.arff.gz -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = sklearn 4 | parallel = True 5 | omit = 6 | */sklearn/externals/* 7 | */sklearn/_build_utils/* 8 | */benchmarks/* 9 | **/setup.py 10 | -------------------------------------------------------------------------------- /doc/tutorial/common_includes/info.txt: -------------------------------------------------------------------------------- 1 | Meant to share common RST file snippets that we want to reuse by inclusion 2 | in the real tutorial in order to lower the maintenance burden 3 | of redundant sections. 4 | -------------------------------------------------------------------------------- /examples/cross_decomposition/README.txt: -------------------------------------------------------------------------------- 1 | .. _cross_decomposition_examples: 2 | 3 | Cross decomposition 4 | ------------------- 5 | 6 | Examples concerning the :mod:`sklearn.cross_decomposition` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/kernel_approximation/README.txt: -------------------------------------------------------------------------------- 1 | .. _kernel_approximation_examples: 2 | 3 | Kernel Approximation 4 | -------------------- 5 | 6 | Examples concerning the :mod:`sklearn.kernel_approximation` module. 7 | -------------------------------------------------------------------------------- /examples/release_highlights/README.txt: -------------------------------------------------------------------------------- 1 | .. _release_highlights_examples: 2 | 3 | Release Highlights 4 | ------------------ 5 | 6 | These examples illustrate the main features of the releases of scikit-learn. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /.github/labeler-file-extensions.yml: -------------------------------------------------------------------------------- 1 | cython: 2 | - sklearn/**/*.pyx 3 | - sklearn/**/*.pxd 4 | - sklearn/**/*.pxi 5 | # Tempita templates 6 | - sklearn/**/*.pyx.tp 7 | - sklearn/**/*.pxd.tp 8 | - sklearn/**/*.pxi.tp 9 | -------------------------------------------------------------------------------- /examples/semi_supervised/README.txt: -------------------------------------------------------------------------------- 1 | .. _semi_supervised_examples: 2 | 3 | Semi Supervised Classification 4 | ------------------------------ 5 | 6 | Examples concerning the :mod:`sklearn.semi_supervised` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /doc/templates/generate_deprecated.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for f in [^d]*; do (head -n2 < $f; echo ' 3 | .. meta:: 4 | :robots: noindex 5 | 6 | .. warning:: 7 | **DEPRECATED** 8 | '; tail -n+3 $f) > deprecated_$f; done 9 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-dv-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-dv-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /lgtm.yml: -------------------------------------------------------------------------------- 1 | extraction: 2 | cpp: 3 | before_index: 4 | - pip3 install numpy==1.16.3 5 | - pip3 install --no-deps scipy Cython 6 | index: 7 | build_command: 8 | - python3 setup.py build_ext -i 9 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /examples/gaussian_process/README.txt: -------------------------------------------------------------------------------- 1 | .. _gaussian_process_examples: 2 | 3 | Gaussian Process for Machine Learning 4 | ------------------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.gaussian_process` module. 7 | 8 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/__init__.py: -------------------------------------------------------------------------------- 1 | """This module implements histogram-based gradient boosting estimators. 2 | 3 | The implementation is a port from pygbm which is itself strongly inspired 4 | from LightGBM. 5 | """ 6 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/libsvm_template.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* this is a hack to generate libsvm with both sparse and dense 3 | methods in the same binary*/ 4 | 5 | #define _DENSE_REP 6 | #include "svm.cpp" 7 | #undef _DENSE_REP 8 | #include "svm.cpp" 9 | -------------------------------------------------------------------------------- /examples/applications/README.txt: -------------------------------------------------------------------------------- 1 | .. _realworld_examples: 2 | 3 | Examples based on real world datasets 4 | ------------------------------------- 5 | 6 | Applications to real world problems with some medium sized datasets or 7 | interactive user interface. 8 | -------------------------------------------------------------------------------- /examples/compose/README.txt: -------------------------------------------------------------------------------- 1 | .. _compose_examples: 2 | 3 | Pipelines and composite estimators 4 | ---------------------------------- 5 | 6 | Examples of how to compose transformers and pipelines from other estimators. See the :ref:`User Guide `. 7 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | # Documentation for scikit-learn 2 | 3 | This directory contains the full manual and website as displayed at 4 | http://scikit-learn.org. See 5 | http://scikit-learn.org/dev/developers/contributing.html#documentation for 6 | detailed information about the documentation. 7 | -------------------------------------------------------------------------------- /doc/modules/pipeline.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. raw:: html 4 | 5 | 6 | 9 | 10 | This content is now at :ref:`combining_estimators`. 11 | -------------------------------------------------------------------------------- /sklearn/utils/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import sklearn 4 | 5 | 6 | @pytest.fixture 7 | def print_changed_only_false(): 8 | sklearn.set_config(print_changed_only=False) 9 | yield 10 | sklearn.set_config(print_changed_only=True) # reset to default 11 | -------------------------------------------------------------------------------- /doc/testimonials/README.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | To find the list of people we contacted, see: 4 | https://docs.google.com/spreadsheet/ccc?key=0AhGnAxuBDhjmdDYwNzlZVE5SMkFsMjNBbGlaWkpNZ1E&usp=sharing 5 | 6 | To obtain access to this file, send an email to: 7 | nelle dot varoquaux at gmail dot com 8 | 9 | -------------------------------------------------------------------------------- /doc/binder/requirements.txt: -------------------------------------------------------------------------------- 1 | # A binder requirement file is required by sphinx-gallery. 2 | # We don't really need one since our binder requirement file lives in the 3 | # .binder directory. 4 | # This file can be removed if 'dependencies' is made an optional key for 5 | # binder in sphinx-gallery. 6 | -------------------------------------------------------------------------------- /doc/templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/_svm_cython_blas_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _SVM_CYTHON_BLAS_HELPERS_H 2 | #define _SVM_CYTHON_BLAS_HELPERS_H 3 | 4 | typedef double (*dot_func)(int, double*, int, double*, int); 5 | typedef struct BlasFunctions{ 6 | dot_func dot; 7 | } BlasFunctions; 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /doc/templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /sklearn/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.experimental` module provides importable modules that enable 3 | the use of experimental features or estimators. 4 | 5 | The features and estimators that are experimental aren't subject to 6 | deprecation cycles. Use them at your own risks! 7 | """ 8 | -------------------------------------------------------------------------------- /sklearn/mixture/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.mixture` module implements mixture modeling algorithms. 3 | """ 4 | 5 | from ._gaussian_mixture import GaussianMixture 6 | from ._bayesian_mixture import BayesianGaussianMixture 7 | 8 | 9 | __all__ = ["GaussianMixture", "BayesianGaussianMixture"] 10 | -------------------------------------------------------------------------------- /sklearn/externals/README: -------------------------------------------------------------------------------- 1 | This directory contains bundled external dependencies that are updated 2 | every once in a while. 3 | 4 | Note for distribution packagers: if you want to remove the duplicated 5 | code and depend on a packaged version, we suggest that you simply do a 6 | symbolic link in this directory. 7 | 8 | -------------------------------------------------------------------------------- /doc/templates/numpydoc_docstring.rst: -------------------------------------------------------------------------------- 1 | {{index}} 2 | {{summary}} 3 | {{extended_summary}} 4 | {{parameters}} 5 | {{returns}} 6 | {{yields}} 7 | {{other_parameters}} 8 | {{attributes}} 9 | {{raises}} 10 | {{warns}} 11 | {{warnings}} 12 | {{see_also}} 13 | {{notes}} 14 | {{references}} 15 | {{examples}} 16 | {{methods}} 17 | -------------------------------------------------------------------------------- /sklearn/neighbors/_partition_nodes.pxd: -------------------------------------------------------------------------------- 1 | from ..utils._typedefs cimport DTYPE_t, ITYPE_t 2 | 3 | cdef int partition_node_indices( 4 | DTYPE_t *data, 5 | ITYPE_t *node_indices, 6 | ITYPE_t split_dim, 7 | ITYPE_t split_index, 8 | ITYPE_t n_features, 9 | ITYPE_t n_points) except -1 10 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_classification.txt: -------------------------------------------------------------------------------- 1 | # comment 2 | # note: the next line contains a tab 3 | 1.0 3:2.5 11:-5.2 16:1.5 # and an inline comment 4 | 2.0 6:1.0 13:-3 5 | # another comment 6 | 3.0 21:27 7 | 4.0 2:1.234567890123456e10 # double precision value 8 | 1.0 # empty line, all zeros 9 | 2.0 3:0 # explicit zeros 10 | -------------------------------------------------------------------------------- /sklearn/datasets/data/linnerud_exercise.csv: -------------------------------------------------------------------------------- 1 | Chins Situps Jumps 2 | 5 162 60 3 | 2 110 60 4 | 12 101 101 5 | 12 105 37 6 | 13 155 58 7 | 4 101 42 8 | 8 101 38 9 | 6 125 40 10 | 15 200 40 11 | 17 251 250 12 | 17 120 38 13 | 13 210 115 14 | 14 215 105 15 | 1 50 50 16 | 6 70 31 17 | 12 210 120 18 | 4 60 25 19 | 11 230 80 20 | 15 225 73 21 | 2 110 43 22 | -------------------------------------------------------------------------------- /sklearn/externals/conftest.py: -------------------------------------------------------------------------------- 1 | # Do not collect any tests in externals. This is more robust than using 2 | # --ignore because --ignore needs a path and it is not convenient to pass in 3 | # the externals path (very long install-dependent path in site-packages) when 4 | # using --pyargs 5 | def pytest_ignore_collect(path, config): 6 | return True 7 | 8 | -------------------------------------------------------------------------------- /sklearn/datasets/data/linnerud_physiological.csv: -------------------------------------------------------------------------------- 1 | Weight Waist Pulse 2 | 191 36 50 3 | 189 37 52 4 | 193 38 58 5 | 162 35 62 6 | 189 35 46 7 | 182 36 56 8 | 211 38 56 9 | 167 34 60 10 | 176 31 74 11 | 154 33 56 12 | 169 34 50 13 | 166 33 52 14 | 154 34 64 15 | 247 46 50 16 | 193 36 46 17 | 202 37 62 18 | 176 37 54 19 | 157 32 52 20 | 156 33 54 21 | 138 33 68 22 | -------------------------------------------------------------------------------- /sklearn/linear_model/_glm/__init__.py: -------------------------------------------------------------------------------- 1 | # License: BSD 3 clause 2 | 3 | from .glm import ( 4 | GeneralizedLinearRegressor, 5 | PoissonRegressor, 6 | GammaRegressor, 7 | TweedieRegressor, 8 | ) 9 | 10 | __all__ = [ 11 | "GeneralizedLinearRegressor", 12 | "PoissonRegressor", 13 | "GammaRegressor", 14 | "TweedieRegressor", 15 | ] 16 | -------------------------------------------------------------------------------- /sklearn/tests/test_check_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | Smoke Test the check_build module 3 | """ 4 | 5 | # Author: G Varoquaux 6 | # License: BSD 3 clause 7 | 8 | import pytest 9 | 10 | from sklearn.__check_build import raise_build_error 11 | 12 | 13 | def test_raise_build_error(): 14 | with pytest.raises(ImportError): 15 | raise_build_error(ImportError()) 16 | -------------------------------------------------------------------------------- /sklearn/svm/_newrand.pyx: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper for newrand.h 3 | 4 | """ 5 | 6 | cdef extern from "newrand.h": 7 | void set_seed(unsigned int) 8 | unsigned int bounded_rand_int(unsigned int) 9 | 10 | def set_seed_wrap(unsigned int custom_seed): 11 | set_seed(custom_seed) 12 | 13 | def bounded_rand_int_wrap(unsigned int range_): 14 | return bounded_rand_int(range_) 15 | -------------------------------------------------------------------------------- /doc/templates/class_with_call.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}=============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __call__ 10 | {% endblock %} 11 | 12 | .. include:: {{module}}.{{objname}}.examples 13 | 14 | .. raw:: html 15 | 16 |
17 | -------------------------------------------------------------------------------- /doc/computing.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | ============================ 6 | Computing with scikit-learn 7 | ============================ 8 | 9 | .. include:: includes/big_toc_css.rst 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | computing/scaling_strategies 15 | computing/computational_performance 16 | computing/parallelism 17 | -------------------------------------------------------------------------------- /sklearn/neural_network/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.neural_network` module includes models based on neural 3 | networks. 4 | """ 5 | 6 | # License: BSD 3 clause 7 | 8 | from ._rbm import BernoulliRBM 9 | 10 | from ._multilayer_perceptron import MLPClassifier 11 | from ._multilayer_perceptron import MLPRegressor 12 | 13 | __all__ = ["BernoulliRBM", "MLPClassifier", "MLPRegressor"] 14 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | # Even if empty this file is useful so that when running from the root folder 2 | # ./sklearn is added to sys.path by pytest. See 3 | # https://docs.pytest.org/en/latest/explanation/pythonpath.html for more 4 | # details. For example, this allows to build extensions in place and run pytest 5 | # doc/modules/clustering.rst and use sklearn from the local folder rather than 6 | # the one from site-packages. 7 | -------------------------------------------------------------------------------- /doc/templates/deprecated_function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autofunction:: {{ objname }} 14 | 15 | .. include:: {{module}}.{{objname}}.examples 16 | 17 | .. raw:: html 18 | 19 |
20 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | .. include:: {{module}}.{{objname}}.examples 16 | 17 | .. raw:: html 18 | 19 |
20 | -------------------------------------------------------------------------------- /sklearn/_distributor_init.py: -------------------------------------------------------------------------------- 1 | """ Distributor init file 2 | 3 | Distributors: you can add custom code here to support particular distributions 4 | of scikit-learn. 5 | 6 | For example, this is a good place to put any checks for hardware requirements. 7 | 8 | The scikit-learn standard source distribution will not put code in this file, 9 | so you can safely replace this file with your own version. 10 | """ 11 | -------------------------------------------------------------------------------- /doc/model_selection.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. include:: includes/big_toc_css.rst 6 | 7 | .. _model_selection: 8 | 9 | Model selection and evaluation 10 | ------------------------------ 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | modules/cross_validation 16 | modules/grid_search 17 | modules/model_evaluation 18 | modules/learning_curve 19 | -------------------------------------------------------------------------------- /doc/developers/index.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. _developers_guide: 6 | 7 | ================= 8 | Developer's Guide 9 | ================= 10 | 11 | .. include:: ../includes/big_toc_css.rst 12 | .. include:: ../tune_toc.rst 13 | 14 | .. toctree:: 15 | 16 | contributing 17 | develop 18 | tips 19 | utilities 20 | performance 21 | advanced_installation 22 | bug_triaging 23 | maintainer 24 | plotting 25 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | {% block methods %} 16 | .. automethod:: __init__ 17 | {% endblock %} 18 | 19 | .. include:: {{module}}.{{objname}}.examples 20 | 21 | .. raw:: html 22 | 23 |
24 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn-modern/search.html: -------------------------------------------------------------------------------- 1 | {%- extends "basic/search.html" %} 2 | {% block extrahead %} 3 | 4 | 5 | 6 | 7 | 8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /sklearn/impute/__init__.py: -------------------------------------------------------------------------------- 1 | """Transformers for missing value imputation""" 2 | import typing 3 | 4 | from ._base import MissingIndicator, SimpleImputer 5 | from ._knn import KNNImputer 6 | 7 | if typing.TYPE_CHECKING: 8 | # Avoid errors in type checkers (e.g. mypy) for experimental estimators. 9 | # TODO: remove this check once the estimator is no longer experimental. 10 | from ._iterative import IterativeImputer # noqa 11 | 12 | __all__ = ["MissingIndicator", "SimpleImputer", "KNNImputer"] 13 | -------------------------------------------------------------------------------- /sklearn/inspection/setup.py: -------------------------------------------------------------------------------- 1 | from numpy.distutils.misc_util import Configuration 2 | 3 | 4 | def configuration(parent_package="", top_path=None): 5 | config = Configuration("inspection", parent_package, top_path) 6 | 7 | config.add_subpackage("_plot") 8 | config.add_subpackage("_plot.tests") 9 | 10 | config.add_subpackage("tests") 11 | 12 | return config 13 | 14 | 15 | if __name__ == "__main__": 16 | from numpy.distutils.core import setup 17 | 18 | setup(**configuration().todict()) 19 | -------------------------------------------------------------------------------- /doc/contents.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | .. include:: tune_toc.rst 3 | 4 | .. Places global toc into the sidebar 5 | 6 | :globalsidebartoc: True 7 | 8 | ================= 9 | Table Of Contents 10 | ================= 11 | 12 | .. Define an order for the Table of Contents: 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | 17 | preface 18 | tutorial/index 19 | getting_started 20 | user_guide 21 | glossary 22 | auto_examples/index 23 | modules/classes 24 | developers/index 25 | -------------------------------------------------------------------------------- /sklearn/semi_supervised/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.semi_supervised` module implements semi-supervised learning 3 | algorithms. These algorithms utilize small amounts of labeled data and large 4 | amounts of unlabeled data for classification tasks. This module includes Label 5 | Propagation. 6 | """ 7 | 8 | from ._label_propagation import LabelPropagation, LabelSpreading 9 | from ._self_training import SelfTrainingClassifier 10 | 11 | __all__ = ["SelfTrainingClassifier", "LabelPropagation", "LabelSpreading"] 12 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class_with_call.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}=============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | {% block methods %} 16 | .. automethod:: __init__ 17 | .. automethod:: __call__ 18 | {% endblock %} 19 | 20 | .. include:: {{module}}.{{objname}}.examples 21 | 22 | .. raw:: html 23 | 24 |
25 | -------------------------------------------------------------------------------- /sklearn/neighbors/tests/test_kd_tree.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from sklearn.neighbors._kd_tree import KDTree 5 | 6 | DIMENSION = 3 7 | 8 | METRICS = {"euclidean": {}, "manhattan": {}, "chebyshev": {}, "minkowski": dict(p=3)} 9 | 10 | 11 | def test_array_object_type(): 12 | """Check that we do not accept object dtype array.""" 13 | X = np.array([(1, 2, 3), (2, 5), (5, 5, 1, 2)], dtype=object) 14 | with pytest.raises(ValueError, match="setting an array element with a sequence"): 15 | KDTree(X) 16 | -------------------------------------------------------------------------------- /sklearn/svm/src/liblinear/_cython_blas_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _CYTHON_BLAS_HELPERS_H 2 | #define _CYTHON_BLAS_HELPERS_H 3 | 4 | typedef double (*dot_func)(int, double*, int, double*, int); 5 | typedef void (*axpy_func)(int, double, double*, int, double*, int); 6 | typedef void (*scal_func)(int, double, double*, int); 7 | typedef double (*nrm2_func)(int, double*, int); 8 | 9 | typedef struct BlasFunctions{ 10 | dot_func dot; 11 | axpy_func axpy; 12 | scal_func scal; 13 | nrm2_func nrm2; 14 | } BlasFunctions; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /sklearn/experimental/tests/test_enable_hist_gradient_boosting.py: -------------------------------------------------------------------------------- 1 | """Tests for making sure experimental imports work as expected.""" 2 | 3 | import textwrap 4 | 5 | from sklearn.utils._testing import assert_run_python_script 6 | 7 | 8 | def test_import_raises_warning(): 9 | code = """ 10 | import pytest 11 | with pytest.warns(UserWarning, match="it is not needed to import"): 12 | from sklearn.experimental import enable_hist_gradient_boosting # noqa 13 | """ 14 | assert_run_python_script(textwrap.dedent(code)) 15 | -------------------------------------------------------------------------------- /sklearn/inspection/__init__.py: -------------------------------------------------------------------------------- 1 | """The :mod:`sklearn.inspection` module includes tools for model inspection.""" 2 | 3 | 4 | from ._permutation_importance import permutation_importance 5 | 6 | from ._partial_dependence import partial_dependence 7 | from ._plot.partial_dependence import plot_partial_dependence 8 | from ._plot.partial_dependence import PartialDependenceDisplay 9 | 10 | 11 | __all__ = [ 12 | "partial_dependence", 13 | "plot_partial_dependence", 14 | "permutation_importance", 15 | "PartialDependenceDisplay", 16 | ] 17 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.feature_extraction` module deals with feature extraction 3 | from raw data. It currently includes methods to extract features from text and 4 | images. 5 | """ 6 | 7 | from ._dict_vectorizer import DictVectorizer 8 | from ._hash import FeatureHasher 9 | from .image import img_to_graph, grid_to_graph 10 | from . import text 11 | 12 | __all__ = [ 13 | "DictVectorizer", 14 | "image", 15 | "img_to_graph", 16 | "grid_to_graph", 17 | "text", 18 | "FeatureHasher", 19 | ] 20 | -------------------------------------------------------------------------------- /doc/unsupervised_learning.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. include:: includes/big_toc_css.rst 6 | 7 | .. _unsupervised-learning: 8 | 9 | Unsupervised learning 10 | ----------------------- 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | modules/mixture 16 | modules/manifold 17 | modules/clustering 18 | modules/biclustering 19 | modules/decomposition 20 | modules/covariance 21 | modules/outlier_detection 22 | modules/density 23 | modules/neural_networks_unsupervised 24 | -------------------------------------------------------------------------------- /sklearn/utils/_typedefs.pxd: -------------------------------------------------------------------------------- 1 | #!python 2 | cimport numpy as np 3 | 4 | # Floating point/data type 5 | ctypedef np.float64_t DTYPE_t # WARNING: should match DTYPE in typedefs.pyx 6 | 7 | cdef enum: 8 | DTYPECODE = np.NPY_FLOAT64 9 | ITYPECODE = np.NPY_INTP 10 | 11 | # Index/integer type. 12 | # WARNING: ITYPE_t must be a signed integer type or you will have a bad time! 13 | ctypedef np.intp_t ITYPE_t # WARNING: should match ITYPE in typedefs.pyx 14 | 15 | # Fused type for certain operations 16 | ctypedef fused DITYPE_t: 17 | ITYPE_t 18 | DTYPE_t 19 | -------------------------------------------------------------------------------- /sklearn/tests/test_init.py: -------------------------------------------------------------------------------- 1 | # Basic unittests to test functioning of module's top-level 2 | 3 | 4 | __author__ = "Yaroslav Halchenko" 5 | __license__ = "BSD" 6 | 7 | 8 | try: 9 | from sklearn import * # noqa 10 | 11 | _top_import_error = None 12 | except Exception as e: 13 | _top_import_error = e 14 | 15 | 16 | def test_import_skl(): 17 | # Test either above import has failed for some reason 18 | # "import *" is discouraged outside of the module level, hence we 19 | # rely on setting up the variable above 20 | assert _top_import_error is None 21 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_arpack.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from numpy.testing import assert_allclose 3 | 4 | from sklearn.utils import check_random_state 5 | from sklearn.utils._arpack import _init_arpack_v0 6 | 7 | 8 | @pytest.mark.parametrize("seed", range(100)) 9 | def test_init_arpack_v0(seed): 10 | # check that the initialization a sampling from an uniform distribution 11 | # where we can fix the random state 12 | size = 1000 13 | v0 = _init_arpack_v0(size, seed) 14 | 15 | rng = check_random_state(seed) 16 | assert_allclose(v0, rng.uniform(-1, 1, size=size)) 17 | -------------------------------------------------------------------------------- /.github/workflows/check-manifest.yml: -------------------------------------------------------------------------------- 1 | name: "Check Manifest" 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | 7 | jobs: 8 | check: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - uses: actions/setup-python@v2 13 | with: 14 | python-version: '3.9' 15 | - name: Install dependencies 16 | # scipy and cython are required to build sdist 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install check-manifest scipy cython 20 | - run: | 21 | check-manifest -v 22 | -------------------------------------------------------------------------------- /sklearn/compose/__init__.py: -------------------------------------------------------------------------------- 1 | """Meta-estimators for building composite models with transformers 2 | 3 | In addition to its current contents, this module will eventually be home to 4 | refurbished versions of Pipeline and FeatureUnion. 5 | 6 | """ 7 | 8 | from ._column_transformer import ( 9 | ColumnTransformer, 10 | make_column_transformer, 11 | make_column_selector, 12 | ) 13 | from ._target import TransformedTargetRegressor 14 | 15 | 16 | __all__ = [ 17 | "ColumnTransformer", 18 | "make_column_transformer", 19 | "TransformedTargetRegressor", 20 | "make_column_selector", 21 | ] 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | | Version | Supported | 6 | | --------- | ------------------ | 7 | | 1.0.1 | :white_check_mark: | 8 | | < 1.0.1 | :x: | 9 | 10 | ## Reporting a Vulnerability 11 | 12 | Please report security vulnerabilities by email to `security@scikit-learn.org`. 13 | This email is an alias to a subset of the scikit-learn maintainers' team. 14 | 15 | If the security vulnerability is accepted, a patch will be crafted privately 16 | in order to prepare a dedicated bugfix release as timely as possible (depending 17 | on the complexity of the fix). 18 | -------------------------------------------------------------------------------- /doc/communication_team.rst: -------------------------------------------------------------------------------- 1 | .. raw :: html 2 | 3 | 4 |
5 | 8 |
9 |
10 |

Reshama Shaikh

11 |
12 |
13 |
14 |

Lauren Burke

15 |
16 |
17 | -------------------------------------------------------------------------------- /doc/whats_new/changelog_legend.inc: -------------------------------------------------------------------------------- 1 | Legend for changelogs 2 | --------------------- 3 | 4 | - |MajorFeature|: something big that you couldn't do before. 5 | - |Feature|: something that you couldn't do before. 6 | - |Efficiency|: an existing feature now may not require as much computation or 7 | memory. 8 | - |Enhancement|: a miscellaneous minor improvement. 9 | - |Fix|: something that previously didn't work as documentated -- or according 10 | to reasonable expectations -- should now work. 11 | - |API|: you will need to change your code to have the same effect in the 12 | future; or a feature will be removed in the future. 13 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ Network tests are only run, if data is already locally available, 2 | or if download is specifically requested by environment variable.""" 3 | import builtins 4 | import pytest 5 | 6 | 7 | @pytest.fixture 8 | def hide_available_pandas(monkeypatch): 9 | """Pretend pandas was not installed.""" 10 | import_orig = builtins.__import__ 11 | 12 | def mocked_import(name, *args, **kwargs): 13 | if name == "pandas": 14 | raise ImportError() 15 | return import_orig(name, *args, **kwargs) 16 | 17 | monkeypatch.setattr(builtins, "__import__", mocked_import) 18 | -------------------------------------------------------------------------------- /.github/workflows/unassign.yml: -------------------------------------------------------------------------------- 1 | name: Unassign 2 | #Runs when a contributor has unassigned themselves from the issue and adds 'help wanted' 3 | on: 4 | issues: 5 | types: unassigned 6 | 7 | jobs: 8 | one: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: 12 | if: github.event.issue.state == 'open' 13 | run: | 14 | echo "Marking issue ${{ github.event.issue.number }} as help wanted" 15 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"labels": ["help wanted"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels 16 | -------------------------------------------------------------------------------- /sklearn/gaussian_process/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Author: Jan Hendrik Metzen 4 | # Vincent Dubourg 5 | # (mostly translation, see implementation details) 6 | # License: BSD 3 clause 7 | 8 | """ 9 | The :mod:`sklearn.gaussian_process` module implements Gaussian Process 10 | based regression and classification. 11 | """ 12 | 13 | from ._gpr import GaussianProcessRegressor 14 | from ._gpc import GaussianProcessClassifier 15 | from . import kernels 16 | 17 | 18 | __all__ = ["GaussianProcessRegressor", "GaussianProcessClassifier", "kernels"] 19 | -------------------------------------------------------------------------------- /sklearn/manifold/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.manifold` module implements data embedding techniques. 3 | """ 4 | 5 | from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding 6 | from ._isomap import Isomap 7 | from ._mds import MDS, smacof 8 | from ._spectral_embedding import SpectralEmbedding, spectral_embedding 9 | from ._t_sne import TSNE, trustworthiness 10 | 11 | __all__ = [ 12 | "locally_linear_embedding", 13 | "LocallyLinearEmbedding", 14 | "Isomap", 15 | "MDS", 16 | "smacof", 17 | "SpectralEmbedding", 18 | "spectral_embedding", 19 | "TSNE", 20 | "trustworthiness", 21 | ] 22 | -------------------------------------------------------------------------------- /sklearn/__check_build/setup.py: -------------------------------------------------------------------------------- 1 | # Author: Virgile Fritsch 2 | # License: BSD 3 clause 3 | 4 | import numpy 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | from numpy.distutils.misc_util import Configuration 9 | 10 | config = Configuration("__check_build", parent_package, top_path) 11 | config.add_extension( 12 | "_check_build", sources=["_check_build.pyx"], include_dirs=[numpy.get_include()] 13 | ) 14 | 15 | return config 16 | 17 | 18 | if __name__ == "__main__": 19 | from numpy.distutils.core import setup 20 | 21 | setup(**configuration(top_path="").todict()) 22 | -------------------------------------------------------------------------------- /sklearn/preprocessing/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def configuration(parent_package="", top_path=None): 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | config = Configuration("preprocessing", parent_package, top_path) 9 | libraries = [] 10 | if os.name == "posix": 11 | libraries.append("m") 12 | 13 | config.add_extension( 14 | "_csr_polynomial_expansion", 15 | sources=["_csr_polynomial_expansion.pyx"], 16 | include_dirs=[numpy.get_include()], 17 | libraries=libraries, 18 | ) 19 | 20 | config.add_subpackage("tests") 21 | 22 | return config 23 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/psf/black 9 | rev: 21.6b0 10 | hooks: 11 | - id: black 12 | - repo: https://gitlab.com/pycqa/flake8 13 | rev: 3.9.2 14 | hooks: 15 | - id: flake8 16 | types: [file, python] 17 | - repo: https://github.com/pre-commit/mirrors-mypy 18 | rev: v0.782 19 | hooks: 20 | - id: mypy 21 | files: sklearn/ 22 | additional_dependencies: [pytest==6.2.4] 23 | -------------------------------------------------------------------------------- /.github/workflows/labeler-title-regex.yml: -------------------------------------------------------------------------------- 1 | name: Pull Request Regex Title Labeler 2 | on: 3 | pull_request_target: 4 | types: [opened, edited] 5 | 6 | permissions: 7 | contents: read 8 | pull-requests: write 9 | 10 | jobs: 11 | 12 | labeler: 13 | runs-on: ubuntu-20.04 14 | steps: 15 | - uses: actions/checkout@v2 16 | - uses: actions/setup-python@v2 17 | with: 18 | python-version: '3.9' 19 | - name: Install PyGithub 20 | run: pip install -Uq PyGithub 21 | - name: Label pull request 22 | run: python .github/scripts/label_title_regex.py 23 | env: 24 | CONTEXT_GITHUB: ${{ toJson(github) }} 25 | -------------------------------------------------------------------------------- /doc/user_guide.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. title:: User guide: contents 6 | 7 | .. _user_guide: 8 | 9 | ========== 10 | User Guide 11 | ========== 12 | 13 | .. include:: includes/big_toc_css.rst 14 | 15 | .. nice layout in the toc 16 | 17 | .. include:: tune_toc.rst 18 | 19 | .. toctree:: 20 | :numbered: 21 | :maxdepth: 3 22 | 23 | supervised_learning.rst 24 | unsupervised_learning.rst 25 | model_selection.rst 26 | inspection.rst 27 | visualizations.rst 28 | data_transforms.rst 29 | datasets.rst 30 | computing.rst 31 | modules/model_persistence.rst 32 | common_pitfalls.rst 33 | -------------------------------------------------------------------------------- /sklearn/utils/_fast_dict.pxd: -------------------------------------------------------------------------------- 1 | # Author: Gael Varoquaux 2 | # License: BSD 3 | """ 4 | Uses C++ map containers for fast dict-like behavior with keys being 5 | integers, and values float. 6 | """ 7 | 8 | from libcpp.map cimport map as cpp_map 9 | 10 | # Import the C-level symbols of numpy 11 | cimport numpy as np 12 | 13 | ctypedef np.float64_t DTYPE_t 14 | 15 | ctypedef np.intp_t ITYPE_t 16 | 17 | ############################################################################### 18 | # An object to be used in Python 19 | 20 | cdef class IntFloatDict: 21 | cdef cpp_map[ITYPE_t, DTYPE_t] my_map 22 | cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values) 23 | -------------------------------------------------------------------------------- /doc/templates/redirects.html: -------------------------------------------------------------------------------- 1 | {% set redirect = pathto(redirects[pagename]) %} 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | scikit-learn: machine learning in Python 11 | 12 | 13 |

You will be automatically redirected to the new location of this page.

14 | 15 | 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/doc_improvement.yml: -------------------------------------------------------------------------------- 1 | name: Documentation improvement 2 | description: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change. 3 | labels: [Documentation] 4 | 5 | body: 6 | - type: textarea 7 | attributes: 8 | label: Describe the issue linked to the documentation 9 | description: > 10 | Tell us about the confusion introduced in the documentation. 11 | validations: 12 | required: true 13 | - type: textarea 14 | attributes: 15 | label: Suggest a potential alternative/fix 16 | description: > 17 | Tell us how we could improve the documentation in this regard. 18 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/.gitignore: -------------------------------------------------------------------------------- 1 | # cruft 2 | .*.swp 3 | *.pyc 4 | .DS_Store 5 | *.pdf 6 | 7 | # folder to be used for working on the exercises 8 | workspace 9 | 10 | # output of the sphinx build of the documentation 11 | tutorial/_build 12 | 13 | # datasets to be fetched from the web and cached locally 14 | data/twenty_newsgroups/20news-bydate.tar.gz 15 | data/twenty_newsgroups/20news-bydate-train 16 | data/twenty_newsgroups/20news-bydate-test 17 | 18 | data/movie_reviews/txt_sentoken 19 | data/movie_reviews/poldata.README.2.0 20 | 21 | data/languages/paragraphs 22 | data/languages/short_paragraphs 23 | data/languages/html 24 | 25 | data/labeled_faces_wild/lfw_preprocessed/ 26 | -------------------------------------------------------------------------------- /doc/preface.rst: -------------------------------------------------------------------------------- 1 | .. This helps define the TOC ordering for "about us" sections. Particularly 2 | useful for PDF output as this section is not linked from elsewhere. 3 | 4 | .. Places global toc into the sidebar 5 | 6 | :globalsidebartoc: True 7 | 8 | .. _preface_menu: 9 | 10 | .. include:: includes/big_toc_css.rst 11 | .. include:: tune_toc.rst 12 | 13 | ======================= 14 | Welcome to scikit-learn 15 | ======================= 16 | 17 | | 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | 22 | install 23 | faq 24 | support 25 | related_projects 26 | about 27 | testimonials/testimonials 28 | whats_new 29 | roadmap 30 | governance 31 | 32 | | 33 | -------------------------------------------------------------------------------- /sklearn/neighbors/_distance_metric.py: -------------------------------------------------------------------------------- 1 | # TODO: Remove this file in 1.3 2 | import warnings 3 | 4 | from ..metrics import DistanceMetric as _DistanceMetric 5 | 6 | 7 | class DistanceMetric(_DistanceMetric): 8 | @classmethod 9 | def _warn(cls): 10 | warnings.warn( 11 | "sklearn.neighbors.DistanceMetric has been moved " 12 | "to sklearn.metrics.DistanceMetric in 1.0. " 13 | "This import path will be removed in 1.3", 14 | category=FutureWarning, 15 | ) 16 | 17 | @classmethod 18 | def get_metric(cls, metric, **kwargs): 19 | DistanceMetric._warn() 20 | return _DistanceMetric.get_metric(metric, **kwargs) 21 | -------------------------------------------------------------------------------- /sklearn/tree/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.tree` module includes decision tree-based models for 3 | classification and regression. 4 | """ 5 | 6 | from ._classes import BaseDecisionTree 7 | from ._classes import DecisionTreeClassifier 8 | from ._classes import DecisionTreeRegressor 9 | from ._classes import ExtraTreeClassifier 10 | from ._classes import ExtraTreeRegressor 11 | from ._export import export_graphviz, plot_tree, export_text 12 | 13 | __all__ = [ 14 | "BaseDecisionTree", 15 | "DecisionTreeClassifier", 16 | "DecisionTreeRegressor", 17 | "ExtraTreeClassifier", 18 | "ExtraTreeRegressor", 19 | "export_graphviz", 20 | "plot_tree", 21 | "export_text", 22 | ] 23 | -------------------------------------------------------------------------------- /sklearn/linear_model/_sgd_fast_helpers.h: -------------------------------------------------------------------------------- 1 | // We cannot directly reuse the npy_isfinite from npy_math.h as numpy 2 | // and scikit-learn are not necessarily built with the same compiler. 3 | // When re-declaring the functions in the template for cython 4 | // specific for each parameter input type, it needs to be 2 different functions 5 | // as cython doesn't support function overloading. 6 | #ifdef _MSC_VER 7 | # include 8 | # define skl_isfinite _finite 9 | # define skl_isfinite32 _finite 10 | # define skl_isfinite64 _finite 11 | #else 12 | # include 13 | # define skl_isfinite npy_isfinite 14 | # define skl_isfinite32 npy_isfinite 15 | # define skl_isfinite64 npy_isfinite 16 | #endif 17 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/LIBSVM_CHANGES: -------------------------------------------------------------------------------- 1 | Changes to Libsvm 2 | 3 | This is here mainly as checklist for incorporation of new versions of libsvm. 4 | 5 | * Add copyright to files svm.cpp and svm.h 6 | * Add random_seed support and call to srand in fit function 7 | * Improved random number generator (fix on windows, enhancement on other 8 | platforms). See 9 | * invoke scipy blas api for svm kernel function to improve performance with speedup rate of 1.5X to 2X for dense data only. See 10 | The changes made with respect to upstream are detailed in the heading of svm.cpp 11 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | We are a community based on openness, as well as friendly and didactic discussions. 4 | 5 | We aspire to treat everybody equally, and value their contributions. 6 | 7 | Decisions are made based on technical merit and consensus. 8 | 9 | Code is not the only way to help the project. Reviewing pull requests, 10 | answering questions to help others on mailing lists or issues, organizing and 11 | teaching tutorials, working on the website, improving the documentation, are 12 | all priceless contributions. 13 | 14 | We abide by the principles of openness, respect, and consideration of others of 15 | the Python Software Foundation: https://www.python.org/psf/codeofconduct/ 16 | 17 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | 4 | 5 | def configuration(parent_package="", top_path=None): 6 | import numpy 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration("feature_extraction", parent_package, top_path) 10 | libraries = [] 11 | if os.name == "posix": 12 | libraries.append("m") 13 | 14 | if platform.python_implementation() != "PyPy": 15 | config.add_extension( 16 | "_hashing_fast", 17 | sources=["_hashing_fast.pyx"], 18 | include_dirs=[numpy.get_include()], 19 | libraries=libraries, 20 | ) 21 | config.add_subpackage("tests") 22 | 23 | return config 24 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: ['https://numfocus.org/donate-to-scikit-learn'] 13 | -------------------------------------------------------------------------------- /doc/authors_emeritus.rst: -------------------------------------------------------------------------------- 1 | - Mathieu Blondel 2 | - Matthieu Brucher 3 | - Lars Buitinck 4 | - David Cournapeau 5 | - Noel Dawe 6 | - Vincent Dubourg 7 | - Edouard Duchesnay 8 | - Alexander Fabisch 9 | - Virgile Fritsch 10 | - Satrajit Ghosh 11 | - Angel Soler Gollonet 12 | - Chris Gorgolewski 13 | - Jaques Grobler 14 | - Brian Holt 15 | - Arnaud Joly 16 | - Thouis (Ray) Jones 17 | - Kyle Kastner 18 | - manoj kumar 19 | - Robert Layton 20 | - Wei Li 21 | - Paolo Losi 22 | - Gilles Louppe 23 | - Vincent Michel 24 | - Jarrod Millman 25 | - Alexandre Passos 26 | - Fabian Pedregosa 27 | - Peter Prettenhofer 28 | - (Venkat) Raghav, Rajagopalan 29 | - Jacob Schreiber 30 | - Du Shiqiao 31 | - Jake Vanderplas 32 | - David Warde-Farley 33 | - Ron Weiss 34 | -------------------------------------------------------------------------------- /sklearn/svm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.svm` module includes Support Vector Machine algorithms. 3 | """ 4 | 5 | # See http://scikit-learn.sourceforge.net/modules/svm.html for complete 6 | # documentation. 7 | 8 | # Author: Fabian Pedregosa with help from 9 | # the scikit-learn community. LibSVM and LibLinear are copyright 10 | # of their respective owners. 11 | # License: BSD 3 clause (C) INRIA 2010 12 | 13 | from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR 14 | from ._bounds import l1_min_c 15 | 16 | __all__ = [ 17 | "LinearSVC", 18 | "LinearSVR", 19 | "NuSVC", 20 | "NuSVR", 21 | "OneClassSVM", 22 | "SVC", 23 | "SVR", 24 | "l1_min_c", 25 | ] 26 | -------------------------------------------------------------------------------- /.github/scripts/label_title_regex.py: -------------------------------------------------------------------------------- 1 | """Labels PRs based on title. Must be run in a github action with the 2 | pull_request_target event.""" 3 | from github import Github 4 | import os 5 | import json 6 | import re 7 | 8 | context_dict = json.loads(os.getenv("CONTEXT_GITHUB")) 9 | 10 | repo = context_dict["repository"] 11 | g = Github(context_dict["token"]) 12 | repo = g.get_repo(repo) 13 | pr_number = context_dict["event"]["number"] 14 | issue = repo.get_issue(number=pr_number) 15 | title = issue.title 16 | 17 | 18 | regex_to_labels = [(r"\bDOC\b", "Documentation"), (r"\bCI\b", "Build / CI")] 19 | 20 | labels_to_add = [label for regex, label in regex_to_labels if re.search(regex, title)] 21 | 22 | if labels_to_add: 23 | issue.add_to_labels(*labels_to_add) 24 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd: -------------------------------------------------------------------------------- 1 | from .common cimport X_BINNED_DTYPE_C 2 | from .common cimport BITSET_DTYPE_C 3 | from .common cimport BITSET_INNER_DTYPE_C 4 | from .common cimport X_DTYPE_C 5 | 6 | cdef void init_bitset(BITSET_DTYPE_C bitset) nogil 7 | 8 | cdef void set_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) nogil 9 | 10 | cdef unsigned char in_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) nogil 11 | 12 | cpdef unsigned char in_bitset_memoryview(const BITSET_INNER_DTYPE_C[:] bitset, 13 | X_BINNED_DTYPE_C val) nogil 14 | 15 | cdef unsigned char in_bitset_2d_memoryview( 16 | const BITSET_INNER_DTYPE_C [:, :] bitset, 17 | X_BINNED_DTYPE_C val, 18 | unsigned int row) nogil 19 | -------------------------------------------------------------------------------- /sklearn/metrics/cluster/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | from numpy.distutils.misc_util import Configuration 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | config = Configuration("cluster", parent_package, top_path) 9 | libraries = [] 10 | if os.name == "posix": 11 | libraries.append("m") 12 | config.add_extension( 13 | "_expected_mutual_info_fast", 14 | sources=["_expected_mutual_info_fast.pyx"], 15 | include_dirs=[numpy.get_include()], 16 | libraries=libraries, 17 | ) 18 | 19 | config.add_subpackage("tests") 20 | 21 | return config 22 | 23 | 24 | if __name__ == "__main__": 25 | from numpy.distutils.core import setup 26 | 27 | setup(**configuration().todict()) 28 | -------------------------------------------------------------------------------- /sklearn/model_selection/tests/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common utilities for testing model selection. 3 | """ 4 | 5 | import numpy as np 6 | 7 | from sklearn.model_selection import KFold 8 | 9 | 10 | class OneTimeSplitter: 11 | """A wrapper to make KFold single entry cv iterator""" 12 | 13 | def __init__(self, n_splits=4, n_samples=99): 14 | self.n_splits = n_splits 15 | self.n_samples = n_samples 16 | self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples))) 17 | 18 | def split(self, X=None, y=None, groups=None): 19 | """Split can be called only once""" 20 | for index in self.indices: 21 | yield index 22 | 23 | def get_n_splits(self, X=None, y=None, groups=None): 24 | return self.n_splits 25 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Since git version 2.23, git-blame has a feature to ignore 2 | # certain commits. 3 | # 4 | # This file contains a list of commits that are not likely what 5 | # you are looking for in `git blame`. You can set this file as 6 | # a default ignore file for blame by running the following 7 | # command. 8 | # 9 | # $ git config blame.ignoreRevsFile .git-blame-ignore-revs 10 | 11 | # PR 18948: Migrate code style to Black 12 | 82df48934eba1df9a1ed3be98aaace8eada59e6e 13 | 14 | # PR 20294: Use target_version >= 3.7 in Black 15 | 351ace7935a4ea685171cc6d174890f08facd561 16 | 17 | # PR 20412: Use experimental_string_processing=true in Black 18 | 3ae7c7615343bbd36acece57825d8b0d70fd9da4 19 | 20 | # PR 20502: Runs Black on examples 21 | 70a185ae59b4362633d18b0d0083abb1b6f7370c 22 | -------------------------------------------------------------------------------- /doc/supervised_learning.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. include:: includes/big_toc_css.rst 6 | 7 | .. _supervised-learning: 8 | 9 | Supervised learning 10 | ----------------------- 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | modules/linear_model 16 | modules/lda_qda.rst 17 | modules/kernel_ridge.rst 18 | modules/svm 19 | modules/sgd 20 | modules/neighbors 21 | modules/gaussian_process 22 | modules/cross_decomposition.rst 23 | modules/naive_bayes 24 | modules/tree 25 | modules/ensemble 26 | modules/multiclass 27 | modules/feature_selection.rst 28 | modules/semi_supervised.rst 29 | modules/isotonic.rst 30 | modules/calibration.rst 31 | modules/neural_networks_supervised 32 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_weight_vector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.utils._weight_vector import ( 4 | WeightVector32, 5 | WeightVector64, 6 | ) 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "dtype, WeightVector", 11 | [ 12 | (np.float32, WeightVector32), 13 | (np.float64, WeightVector64), 14 | ], 15 | ) 16 | def test_type_invariance(dtype, WeightVector): 17 | """Check the `dtype` consistency of `WeightVector`.""" 18 | weights = np.random.rand(100).astype(dtype) 19 | average_weights = np.random.rand(100).astype(dtype) 20 | 21 | weight_vector = WeightVector(weights, average_weights) 22 | 23 | assert np.asarray(weight_vector.w).dtype is np.dtype(dtype) 24 | assert np.asarray(weight_vector.aw).dtype is np.dtype(dtype) 25 | -------------------------------------------------------------------------------- /sklearn/experimental/enable_iterative_imputer.py: -------------------------------------------------------------------------------- 1 | """Enables IterativeImputer 2 | 3 | The API and results of this estimator might change without any deprecation 4 | cycle. 5 | 6 | Importing this file dynamically sets :class:`~sklearn.impute.IterativeImputer` 7 | as an attribute of the impute module:: 8 | 9 | >>> # explicitly require this experimental feature 10 | >>> from sklearn.experimental import enable_iterative_imputer # noqa 11 | >>> # now you can import normally from impute 12 | >>> from sklearn.impute import IterativeImputer 13 | """ 14 | 15 | from ..impute._iterative import IterativeImputer 16 | from .. import impute 17 | 18 | # use settattr to avoid mypy errors when monkeypatching 19 | setattr(impute, "IterativeImputer", IterativeImputer) 20 | impute.__all__ += ["IterativeImputer"] 21 | -------------------------------------------------------------------------------- /.github/workflows/labeler-module.yml: -------------------------------------------------------------------------------- 1 | name: "Pull Request Labeler" 2 | on: pull_request_target 3 | 4 | jobs: 5 | triage: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: thomasjpfan/labeler@v2.5.0 9 | continue-on-error: true 10 | if: github.repository == 'scikit-learn/scikit-learn' 11 | with: 12 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 13 | max-labels: "3" 14 | configuration-path: ".github/labeler-module.yml" 15 | 16 | triage_file_extensions: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: thomasjpfan/labeler@v2.5.0 20 | continue-on-error: true 21 | if: github.repository == 'scikit-learn/scikit-learn' 22 | with: 23 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 24 | configuration-path: ".github/labeler-file-extensions.yml" -------------------------------------------------------------------------------- /sklearn/cluster/_k_means_common.pxd: -------------------------------------------------------------------------------- 1 | from cython cimport floating 2 | cimport numpy as np 3 | 4 | 5 | cdef floating _euclidean_dense_dense(floating*, floating*, int, bint) nogil 6 | 7 | cdef floating _euclidean_sparse_dense(floating[::1], int[::1], floating[::1], 8 | floating, bint) nogil 9 | 10 | cpdef void _relocate_empty_clusters_dense( 11 | floating[:, ::1], floating[::1], floating[:, ::1], 12 | floating[:, ::1], floating[::1], int[::1]) 13 | 14 | cpdef void _relocate_empty_clusters_sparse( 15 | floating[::1], int[::1], int[::1], floating[::1], floating[:, ::1], 16 | floating[:, ::1], floating[::1], int[::1]) 17 | 18 | cdef void _average_centers(floating[:, ::1], floating[::1]) 19 | 20 | cdef void _center_shift(floating[:, ::1], floating[:, ::1], floating[::1]) 21 | -------------------------------------------------------------------------------- /sklearn/datasets/images/README.txt: -------------------------------------------------------------------------------- 1 | Image: china.jpg 2 | Released under a creative commons license. [1] 3 | Attribution: Some rights reserved by danielbuechele [2] 4 | Retrieved 21st August, 2011 from [3] by Robert Layton 5 | 6 | [1] https://creativecommons.org/licenses/by/2.0/ 7 | [2] https://www.flickr.com/photos/danielbuechele/ 8 | [3] https://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/ 9 | 10 | 11 | Image: flower.jpg 12 | Released under a creative commons license. [1] 13 | Attribution: Some rights reserved by danielbuechele [2] 14 | Retrieved 21st August, 2011 from [3] by Robert Layton 15 | 16 | [1] https://creativecommons.org/licenses/by/2.0/ 17 | [2] https://www.flickr.com/photos/vultilion/ 18 | [3] https://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/ 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/linnerud.rst: -------------------------------------------------------------------------------- 1 | .. _linnerrud_dataset: 2 | 3 | Linnerrud dataset 4 | ----------------- 5 | 6 | **Data Set Characteristics:** 7 | 8 | :Number of Instances: 20 9 | :Number of Attributes: 3 10 | :Missing Attribute Values: None 11 | 12 | The Linnerud dataset is a multi-output regression dataset. It consists of three 13 | exercise (data) and three physiological (target) variables collected from 14 | twenty middle-aged men in a fitness club: 15 | 16 | - *physiological* - CSV containing 20 observations on 3 physiological variables: 17 | Weight, Waist and Pulse. 18 | - *exercise* - CSV containing 20 observations on 3 exercise variables: 19 | Chins, Situps and Jumps. 20 | 21 | .. topic:: References 22 | 23 | * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: 24 | Editions Technic. 25 | -------------------------------------------------------------------------------- /sklearn/experimental/enable_hist_gradient_boosting.py: -------------------------------------------------------------------------------- 1 | """This is now a no-op and can be safely removed from your code. 2 | 3 | It used to enable the use of 4 | :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and 5 | :class:`~sklearn.ensemble.HistGradientBoostingRegressor` when they were still 6 | :term:`experimental`, but these estimators are now stable and can be imported 7 | normally from `sklearn.ensemble`. 8 | """ 9 | # Don't remove this file, we don't want to break users code just because the 10 | # feature isn't experimental anymore. 11 | 12 | 13 | import warnings 14 | 15 | 16 | warnings.warn( 17 | "Since version 1.0, " 18 | "it is not needed to import enable_hist_gradient_boosting anymore. " 19 | "HistGradientBoostingClassifier and HistGradientBoostingRegressor are now " 20 | "stable and can be normally imported from sklearn.ensemble." 21 | ) 22 | -------------------------------------------------------------------------------- /sklearn/utils/_joblib.py: -------------------------------------------------------------------------------- 1 | import warnings as _warnings 2 | 3 | with _warnings.catch_warnings(): 4 | _warnings.simplefilter("ignore") 5 | # joblib imports may raise DeprecationWarning on certain Python 6 | # versions 7 | import joblib 8 | from joblib import logger 9 | from joblib import dump, load 10 | from joblib import __version__ 11 | from joblib import effective_n_jobs 12 | from joblib import hash 13 | from joblib import cpu_count, Parallel, Memory, delayed 14 | from joblib import parallel_backend, register_parallel_backend 15 | 16 | 17 | __all__ = [ 18 | "parallel_backend", 19 | "register_parallel_backend", 20 | "cpu_count", 21 | "Parallel", 22 | "Memory", 23 | "delayed", 24 | "effective_n_jobs", 25 | "hash", 26 | "logger", 27 | "dump", 28 | "load", 29 | "joblib", 30 | "__version__", 31 | ] 32 | -------------------------------------------------------------------------------- /.github/workflows/assign.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Assign 3 | on: 4 | issue_comment: 5 | types: created 6 | 7 | jobs: 8 | one: 9 | runs-on: ubuntu-latest 10 | if: >- 11 | (github.event.comment.body == 'take' || 12 | github.event.comment.body == 'Take') 13 | && !github.event.issue.assignee 14 | steps: 15 | - run: | 16 | echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" 17 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees 18 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X "DELETE" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/help%20wanted 19 | -------------------------------------------------------------------------------- /sklearn/utils/_logistic_sigmoid.pyx: -------------------------------------------------------------------------------- 1 | from libc.math cimport log, exp 2 | 3 | import numpy as np 4 | cimport numpy as np 5 | 6 | np.import_array() 7 | ctypedef np.float64_t DTYPE_t 8 | 9 | 10 | cdef inline DTYPE_t _inner_log_logistic_sigmoid(const DTYPE_t x): 11 | """Log of the logistic sigmoid function log(1 / (1 + e ** -x))""" 12 | if x > 0: 13 | return -log(1. + exp(-x)) 14 | else: 15 | return x - log(1. + exp(x)) 16 | 17 | 18 | def _log_logistic_sigmoid(unsigned int n_samples, 19 | unsigned int n_features, 20 | DTYPE_t[:, :] X, 21 | DTYPE_t[:, :] out): 22 | cdef: 23 | unsigned int i 24 | unsigned int j 25 | 26 | for i in range(n_samples): 27 | for j in range(n_features): 28 | out[i, j] = _inner_log_logistic_sigmoid(X[i, j]) 29 | return out 30 | -------------------------------------------------------------------------------- /sklearn/utils/_typedefs.pyx: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | import numpy as np 4 | cimport numpy as np 5 | from libc.math cimport sqrt 6 | 7 | np.import_array() 8 | 9 | 10 | # use a hack to determine the associated numpy data types 11 | # NOTE: the following requires the buffer interface, only available in 12 | # numpy 1.5+. We'll choose the DTYPE by hand instead. 13 | #cdef ITYPE_t idummy 14 | #cdef ITYPE_t[:] idummy_view = &idummy 15 | #ITYPE = np.asarray(idummy_view).dtype 16 | ITYPE = np.intp # WARNING: this should match ITYPE_t in typedefs.pxd 17 | 18 | #cdef DTYPE_t ddummy 19 | #cdef DTYPE_t[:] ddummy_view = &ddummy 20 | #DTYPE = np.asarray(ddummy_view).dtype 21 | DTYPE = np.float64 # WARNING: this should match DTYPE_t in typedefs.pxd 22 | 23 | # some handy constants 24 | cdef DTYPE_t INF = np.inf 25 | cdef DTYPE_t PI = np.pi 26 | cdef DTYPE_t ROOT_2PI = sqrt(2 * PI) 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest a new algorithm, enhancement to an existing algorithm, etc. 3 | labels: ['New Feature'] 4 | 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: > 9 | #### If you want to propose a new algorithm, please refer first to the [scikit-learn inclusion criterion](https://scikit-learn.org/stable/faq.html#what-are-the-inclusion-criteria-for-new-algorithms). 10 | - type: textarea 11 | attributes: 12 | label: Describe the workflow you want to enable 13 | validations: 14 | required: true 15 | - type: textarea 16 | attributes: 17 | label: Describe your proposed solution 18 | validations: 19 | required: true 20 | - type: textarea 21 | attributes: 22 | label: Describe alternatives you've considered, if relevant 23 | - type: textarea 24 | attributes: 25 | label: Additional context 26 | -------------------------------------------------------------------------------- /sklearn/datasets/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import os 3 | import platform 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration("datasets", parent_package, top_path) 10 | config.add_data_dir("data") 11 | config.add_data_dir("descr") 12 | config.add_data_dir("images") 13 | config.add_data_dir(os.path.join("tests", "data")) 14 | if platform.python_implementation() != "PyPy": 15 | config.add_extension( 16 | "_svmlight_format_fast", 17 | sources=["_svmlight_format_fast.pyx"], 18 | include_dirs=[numpy.get_include()], 19 | ) 20 | config.add_subpackage("tests") 21 | return config 22 | 23 | 24 | if __name__ == "__main__": 25 | from numpy.distutils.core import setup 26 | 27 | setup(**configuration(top_path="").todict()) 28 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py: -------------------------------------------------------------------------------- 1 | """Script to download the movie review dataset""" 2 | 3 | import os 4 | import tarfile 5 | from contextlib import closing 6 | from urllib.request import urlopen 7 | 8 | 9 | URL = ("http://www.cs.cornell.edu/people/pabo/" 10 | "movie-review-data/review_polarity.tar.gz") 11 | 12 | ARCHIVE_NAME = URL.rsplit('/', 1)[1] 13 | DATA_FOLDER = "txt_sentoken" 14 | 15 | 16 | if not os.path.exists(DATA_FOLDER): 17 | 18 | if not os.path.exists(ARCHIVE_NAME): 19 | print("Downloading dataset from %s (3 MB)" % URL) 20 | opener = urlopen(URL) 21 | with open(ARCHIVE_NAME, 'wb') as archive: 22 | archive.write(opener.read()) 23 | 24 | print("Decompressing %s" % ARCHIVE_NAME) 25 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: 26 | archive.extractall(path='.') 27 | os.remove(ARCHIVE_NAME) 28 | -------------------------------------------------------------------------------- /asv_benchmarks/benchmarks/svm.py: -------------------------------------------------------------------------------- 1 | from sklearn.svm import SVC 2 | 3 | from .common import Benchmark, Estimator, Predictor 4 | from .datasets import _synth_classification_dataset 5 | from .utils import make_gen_classif_scorers 6 | 7 | 8 | class SVCBenchmark(Predictor, Estimator, Benchmark): 9 | """Benchmarks for SVC.""" 10 | 11 | param_names = ["kernel"] 12 | params = (["linear", "poly", "rbf", "sigmoid"],) 13 | 14 | def setup_cache(self): 15 | super().setup_cache() 16 | 17 | def make_data(self, params): 18 | return _synth_classification_dataset() 19 | 20 | def make_estimator(self, params): 21 | (kernel,) = params 22 | 23 | estimator = SVC( 24 | max_iter=100, tol=1e-16, kernel=kernel, random_state=0, gamma="scale" 25 | ) 26 | 27 | return estimator 28 | 29 | def make_scorers(self): 30 | make_gen_classif_scorers(self) 31 | -------------------------------------------------------------------------------- /examples/decomposition/plot_beta_divergence.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================== 3 | Beta-divergence loss functions 4 | ============================== 5 | 6 | A plot that compares the various Beta-divergence loss functions supported by 7 | the Multiplicative-Update ('mu') solver in :class:`~sklearn.decomposition.NMF`. 8 | 9 | """ 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn.decomposition._nmf import _beta_divergence 14 | 15 | x = np.linspace(0.001, 4, 1000) 16 | y = np.zeros(x.shape) 17 | 18 | colors = "mbgyr" 19 | for j, beta in enumerate((0.0, 0.5, 1.0, 1.5, 2.0)): 20 | for i, xi in enumerate(x): 21 | y[i] = _beta_divergence(1, xi, 1, beta) 22 | name = "beta = %1.1f" % beta 23 | plt.plot(x, y, label=name, color=colors[j]) 24 | 25 | plt.xlabel("x") 26 | plt.title("beta-divergence(1, x)") 27 | plt.legend(loc=0) 28 | plt.axis([0, 4, 0, 3]) 29 | plt.show() 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Discussions 4 | url: https://github.com/scikit-learn/scikit-learn/discussions/new 5 | about: Ask questions and discuss with other scikit-learn community members 6 | - name: Stack Overflow 7 | url: https://stackoverflow.com/questions/tagged/scikit-learn 8 | about: Please ask and answer usage questions on Stack Overflow 9 | - name: Mailing list 10 | url: https://mail.python.org/mailman/listinfo/scikit-learn 11 | about: General discussions and announcements on the mailing list 12 | - name: Gitter 13 | url: https://gitter.im/scikit-learn/scikit-learn 14 | about: Users and developers can sometimes be found on the gitter channel 15 | - name: Blank issue 16 | url: https://github.com/scikit-learn/scikit-learn/issues/new 17 | about: Please note that Github Discussions should be used in most cases instead 18 | -------------------------------------------------------------------------------- /doc/includes/big_toc_css.rst: -------------------------------------------------------------------------------- 1 | .. 2 | File to ..include in a document with a big table of content, to give 3 | it 'style' 4 | 5 | .. raw:: html 6 | 7 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_arrayfuncs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from sklearn.utils._testing import assert_allclose 5 | from sklearn.utils.arrayfuncs import min_pos 6 | 7 | 8 | def test_min_pos(): 9 | # Check that min_pos returns a positive value and that it's consistent 10 | # between float and double 11 | X = np.random.RandomState(0).randn(100) 12 | 13 | min_double = min_pos(X) 14 | min_float = min_pos(X.astype(np.float32)) 15 | 16 | assert_allclose(min_double, min_float) 17 | assert min_double >= 0 18 | 19 | 20 | @pytest.mark.parametrize("dtype", [np.float32, np.float64]) 21 | def test_min_pos_no_positive(dtype): 22 | # Check that the return value of min_pos is the maximum representable 23 | # value of the input dtype when all input elements are <= 0 (#19328) 24 | X = np.full(100, -1.0).astype(dtype, copy=False) 25 | 26 | assert min_pos(X) == np.finfo(dtype).max 27 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_optimize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.utils.optimize import _newton_cg 4 | from scipy.optimize import fmin_ncg 5 | 6 | from sklearn.utils._testing import assert_array_almost_equal 7 | 8 | 9 | def test_newton_cg(): 10 | # Test that newton_cg gives same result as scipy's fmin_ncg 11 | 12 | rng = np.random.RandomState(0) 13 | A = rng.normal(size=(10, 10)) 14 | x0 = np.ones(10) 15 | 16 | def func(x): 17 | Ax = A.dot(x) 18 | return 0.5 * (Ax).dot(Ax) 19 | 20 | def grad(x): 21 | return A.T.dot(A.dot(x)) 22 | 23 | def hess(x, p): 24 | return p.dot(A.T.dot(A.dot(x.all()))) 25 | 26 | def grad_hess(x): 27 | return grad(x), lambda x: A.T.dot(A.dot(x)) 28 | 29 | assert_array_almost_equal( 30 | _newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0], 31 | fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess), 32 | ) 33 | -------------------------------------------------------------------------------- /sklearn/linear_model/_sgd_fast.pxd: -------------------------------------------------------------------------------- 1 | # License: BSD 3 clause 2 | """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx""" 3 | 4 | cdef class LossFunction: 5 | cdef double loss(self, double p, double y) nogil 6 | cdef double dloss(self, double p, double y) nogil 7 | 8 | 9 | cdef class Regression(LossFunction): 10 | cdef double loss(self, double p, double y) nogil 11 | cdef double dloss(self, double p, double y) nogil 12 | 13 | 14 | cdef class Classification(LossFunction): 15 | cdef double loss(self, double p, double y) nogil 16 | cdef double dloss(self, double p, double y) nogil 17 | 18 | 19 | cdef class Log(Classification): 20 | cdef double loss(self, double p, double y) nogil 21 | cdef double dloss(self, double p, double y) nogil 22 | 23 | 24 | cdef class SquaredLoss(Regression): 25 | cdef double loss(self, double p, double y) nogil 26 | cdef double dloss(self, double p, double y) nogil 27 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_cython_templating.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import pytest 3 | import sklearn 4 | 5 | 6 | def test_files_generated_by_templates_are_git_ignored(): 7 | """Check the consistence of the files generated from template files.""" 8 | gitignore_file = pathlib.Path(sklearn.__file__).parent.parent / ".gitignore" 9 | if not gitignore_file.exists(): 10 | pytest.skip("Tests are not run from the source folder") 11 | 12 | base_dir = pathlib.Path(sklearn.__file__).parent 13 | ignored_files = gitignore_file.read_text().split("\n") 14 | ignored_files = [pathlib.Path(line) for line in ignored_files] 15 | 16 | for filename in base_dir.glob("**/*.tp"): 17 | filename = filename.relative_to(base_dir.parent) 18 | # From "path/to/template.p??.tp" to "path/to/template.p??" 19 | filename_wo_tempita_suffix = filename.with_suffix("") 20 | assert filename_wo_tempita_suffix in ignored_files 21 | -------------------------------------------------------------------------------- /sklearn/svm/src/liblinear/tron.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRON_H 2 | #define _TRON_H 3 | 4 | #include "_cython_blas_helpers.h" 5 | 6 | class function 7 | { 8 | public: 9 | virtual double fun(double *w) = 0 ; 10 | virtual void grad(double *w, double *g) = 0 ; 11 | virtual void Hv(double *s, double *Hs) = 0 ; 12 | 13 | virtual int get_nr_variable(void) = 0 ; 14 | virtual ~function(void){} 15 | }; 16 | 17 | class TRON 18 | { 19 | public: 20 | TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000, BlasFunctions *blas = 0); 21 | ~TRON(); 22 | 23 | int tron(double *w); 24 | void set_print_string(void (*i_print) (const char *buf)); 25 | 26 | private: 27 | int trcg(double delta, double *g, double *s, double *r); 28 | double norm_inf(int n, double *x); 29 | 30 | double eps; 31 | int max_iter; 32 | function *fun_obj; 33 | BlasFunctions *blas; 34 | void info(const char *fmt,...); 35 | void (*tron_print_string)(const char *buf); 36 | }; 37 | #endif 38 | -------------------------------------------------------------------------------- /sklearn/utils/murmurhash.pxd: -------------------------------------------------------------------------------- 1 | """Export fast murmurhash C/C++ routines + cython wrappers""" 2 | 3 | cimport numpy as np 4 | 5 | # The C API is disabled for now, since it requires -I flags to get 6 | # compilation to work even when these functions are not used. 7 | #cdef extern from "MurmurHash3.h": 8 | # void MurmurHash3_x86_32(void* key, int len, unsigned int seed, 9 | # void* out) 10 | # 11 | # void MurmurHash3_x86_128(void* key, int len, unsigned int seed, 12 | # void* out) 13 | # 14 | # void MurmurHash3_x64_128(void* key, int len, unsigned int seed, 15 | # void* out) 16 | 17 | 18 | cpdef np.uint32_t murmurhash3_int_u32(int key, unsigned int seed) 19 | cpdef np.int32_t murmurhash3_int_s32(int key, unsigned int seed) 20 | cpdef np.uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed) 21 | cpdef np.int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed) 22 | -------------------------------------------------------------------------------- /sklearn/decomposition/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy 3 | from numpy.distutils.misc_util import Configuration 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | config = Configuration("decomposition", parent_package, top_path) 8 | 9 | libraries = [] 10 | if os.name == "posix": 11 | libraries.append("m") 12 | 13 | config.add_extension( 14 | "_online_lda_fast", 15 | sources=["_online_lda_fast.pyx"], 16 | include_dirs=[numpy.get_include()], 17 | libraries=libraries, 18 | ) 19 | 20 | config.add_extension( 21 | "_cdnmf_fast", 22 | sources=["_cdnmf_fast.pyx"], 23 | include_dirs=[numpy.get_include()], 24 | libraries=libraries, 25 | ) 26 | 27 | config.add_subpackage("tests") 28 | 29 | return config 30 | 31 | 32 | if __name__ == "__main__": 33 | from numpy.distutils.core import setup 34 | 35 | setup(**configuration().todict()) 36 | -------------------------------------------------------------------------------- /doc/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | .. Places global toc into the sidebar 2 | 3 | :globalsidebartoc: True 4 | 5 | .. _tutorial_menu: 6 | 7 | 8 | .. include:: ../includes/big_toc_css.rst 9 | .. include:: ../tune_toc.rst 10 | 11 | ====================== 12 | scikit-learn Tutorials 13 | ====================== 14 | 15 | | 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | 20 | basic/tutorial.rst 21 | statistical_inference/index.rst 22 | text_analytics/working_with_text_data.rst 23 | machine_learning_map/index 24 | ../presentations 25 | 26 | | 27 | 28 | .. note:: **Doctest Mode** 29 | 30 | The code-examples in the above tutorials are written in a 31 | *python-console* format. If you wish to easily execute these examples 32 | in **IPython**, use:: 33 | 34 | %doctest_mode 35 | 36 | in the IPython-console. You can then simply copy and paste the examples 37 | directly into IPython without having to worry about removing the **>>>** 38 | manually. 39 | -------------------------------------------------------------------------------- /.github/workflows/twitter.yml: -------------------------------------------------------------------------------- 1 | # Tweet the URL of a commit on @sklearn_commits whenever a push event 2 | # happens on the main branch 3 | name: Twitter Push Notification 4 | 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | 11 | 12 | jobs: 13 | tweet: 14 | name: Twitter Notification 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Tweet URL of last commit as @sklearn_commits 18 | if: github.repository == 'scikit-learn/scikit-learn' 19 | uses: docker://thomasjpfan/twitter-action:0.3 20 | with: 21 | args: "-message \"https://github.com/scikit-learn/scikit-learn/commit/${{ github.sha }}\"" 22 | env: 23 | TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} 24 | TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} 25 | TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} 26 | TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }} 27 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_fast_dict.py: -------------------------------------------------------------------------------- 1 | """ Test fast_dict. 2 | """ 3 | import numpy as np 4 | 5 | from sklearn.utils._fast_dict import IntFloatDict, argmin 6 | 7 | 8 | def test_int_float_dict(): 9 | rng = np.random.RandomState(0) 10 | keys = np.unique(rng.randint(100, size=10).astype(np.intp)) 11 | values = rng.rand(len(keys)) 12 | 13 | d = IntFloatDict(keys, values) 14 | for key, value in zip(keys, values): 15 | assert d[key] == value 16 | assert len(d) == len(keys) 17 | 18 | d.append(120, 3.0) 19 | assert d[120] == 3.0 20 | assert len(d) == len(keys) + 1 21 | for i in range(2000): 22 | d.append(i + 1000, 4.0) 23 | assert d[1100] == 4.0 24 | 25 | 26 | def test_int_float_dict_argmin(): 27 | # Test the argmin implementation on the IntFloatDict 28 | keys = np.arange(100, dtype=np.intp) 29 | values = np.arange(100, dtype=np.float64) 30 | d = IntFloatDict(keys, values) 31 | assert argmin(d) == (0, 0) 32 | -------------------------------------------------------------------------------- /asv_benchmarks/benchmarks/manifold.py: -------------------------------------------------------------------------------- 1 | from sklearn.manifold import TSNE 2 | 3 | from .common import Benchmark, Estimator 4 | from .datasets import _digits_dataset 5 | 6 | 7 | class TSNEBenchmark(Estimator, Benchmark): 8 | """ 9 | Benchmarks for t-SNE. 10 | """ 11 | 12 | param_names = ["method"] 13 | params = (["exact", "barnes_hut"],) 14 | 15 | def setup_cache(self): 16 | super().setup_cache() 17 | 18 | def make_data(self, params): 19 | (method,) = params 20 | 21 | n_samples = 500 if method == "exact" else None 22 | 23 | return _digits_dataset(n_samples=n_samples) 24 | 25 | def make_estimator(self, params): 26 | (method,) = params 27 | 28 | estimator = TSNE(random_state=0, method=method) 29 | 30 | return estimator 31 | 32 | def make_scorers(self): 33 | self.train_scorer = lambda _, __: self.estimator.kl_divergence_ 34 | self.test_scorer = lambda _, __: self.estimator.kl_divergence_ 35 | -------------------------------------------------------------------------------- /sklearn/neural_network/tests/test_base.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from sklearn.neural_network._base import binary_log_loss 5 | from sklearn.neural_network._base import log_loss 6 | 7 | 8 | def test_binary_log_loss_1_prob_finite(): 9 | # y_proba is equal to one should result in a finite logloss 10 | y_true = np.array([[0, 0, 1]]).T 11 | y_prob = np.array([[0.9, 1.0, 1.0]]).T 12 | 13 | loss = binary_log_loss(y_true, y_prob) 14 | assert np.isfinite(loss) 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "y_true, y_prob", 19 | [ 20 | ( 21 | np.array([[1, 0, 0], [0, 1, 0]]), 22 | np.array([[0.0, 1.0, 0.0], [0.9, 0.05, 0.05]]), 23 | ), 24 | (np.array([[0, 0, 1]]).T, np.array([[0.9, 1.0, 1.0]]).T), 25 | ], 26 | ) 27 | def test_log_loss_1_prob_finite(y_true, y_prob): 28 | # y_proba is equal to 1 should result in a finite logloss 29 | loss = log_loss(y_true, y_prob) 30 | assert np.isfinite(loss) 31 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/data/twenty_newsgroups/fetch_data.py: -------------------------------------------------------------------------------- 1 | """Script to download the 20 newsgroups text classification set""" 2 | 3 | import os 4 | import tarfile 5 | from contextlib import closing 6 | from urllib.request import urlopen 7 | 8 | URL = ("http://people.csail.mit.edu/jrennie/" 9 | "20Newsgroups/20news-bydate.tar.gz") 10 | 11 | ARCHIVE_NAME = URL.rsplit('/', 1)[1] 12 | TRAIN_FOLDER = "20news-bydate-train" 13 | TEST_FOLDER = "20news-bydate-test" 14 | 15 | 16 | if not os.path.exists(TRAIN_FOLDER) or not os.path.exists(TEST_FOLDER): 17 | 18 | if not os.path.exists(ARCHIVE_NAME): 19 | print("Downloading dataset from %s (14 MB)" % URL) 20 | opener = urlopen(URL) 21 | with open(ARCHIVE_NAME, 'wb') as archive: 22 | archive.write(opener.read()) 23 | 24 | print("Decompressing %s" % ARCHIVE_NAME) 25 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: 26 | archive.extractall(path='.') 27 | os.remove(ARCHIVE_NAME) 28 | -------------------------------------------------------------------------------- /examples/model_selection/plot_cv_predict.py: -------------------------------------------------------------------------------- 1 | """ 2 | ==================================== 3 | Plotting Cross-Validated Predictions 4 | ==================================== 5 | 6 | This example shows how to use 7 | :func:`~sklearn.model_selection.cross_val_predict` to visualize prediction 8 | errors. 9 | 10 | """ 11 | 12 | from sklearn import datasets 13 | from sklearn.model_selection import cross_val_predict 14 | from sklearn import linear_model 15 | import matplotlib.pyplot as plt 16 | 17 | lr = linear_model.LinearRegression() 18 | X, y = datasets.load_diabetes(return_X_y=True) 19 | 20 | # cross_val_predict returns an array of the same size as `y` where each entry 21 | # is a prediction obtained by cross validation: 22 | predicted = cross_val_predict(lr, X, y, cv=10) 23 | 24 | fig, ax = plt.subplots() 25 | ax.scatter(y, predicted, edgecolors=(0, 0, 0)) 26 | ax.plot([y.min(), y.max()], [y.min(), y.max()], "k--", lw=4) 27 | ax.set_xlabel("Measured") 28 | ax.set_ylabel("Predicted") 29 | plt.show() 30 | -------------------------------------------------------------------------------- /benchmarks/plot_tsne_mnist.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import os.path as op 4 | 5 | import argparse 6 | 7 | 8 | LOG_DIR = "mnist_tsne_output" 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser("Plot benchmark results for t-SNE") 13 | parser.add_argument( 14 | "--labels", 15 | type=str, 16 | default=op.join(LOG_DIR, "mnist_original_labels_10000.npy"), 17 | help="1D integer numpy array for labels", 18 | ) 19 | parser.add_argument( 20 | "--embedding", 21 | type=str, 22 | default=op.join(LOG_DIR, "mnist_sklearn_TSNE_10000.npy"), 23 | help="2D float numpy array for embedded data", 24 | ) 25 | args = parser.parse_args() 26 | 27 | X = np.load(args.embedding) 28 | y = np.load(args.labels) 29 | 30 | for i in np.unique(y): 31 | mask = y == i 32 | plt.scatter(X[mask, 0], X[mask, 1], alpha=0.2, label=int(i)) 33 | plt.legend(loc="best") 34 | plt.show() 35 | -------------------------------------------------------------------------------- /sklearn/manifold/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration("manifold", parent_package, top_path) 10 | 11 | libraries = [] 12 | if os.name == "posix": 13 | libraries.append("m") 14 | 15 | config.add_extension( 16 | "_utils", 17 | sources=["_utils.pyx"], 18 | include_dirs=[numpy.get_include()], 19 | libraries=libraries, 20 | extra_compile_args=["-O3"], 21 | ) 22 | 23 | config.add_extension( 24 | "_barnes_hut_tsne", 25 | sources=["_barnes_hut_tsne.pyx"], 26 | include_dirs=[numpy.get_include()], 27 | libraries=libraries, 28 | extra_compile_args=["-O3"], 29 | ) 30 | 31 | config.add_subpackage("tests") 32 | 33 | return config 34 | 35 | 36 | if __name__ == "__main__": 37 | from numpy.distutils.core import setup 38 | 39 | setup(**configuration().todict()) 40 | -------------------------------------------------------------------------------- /sklearn/metrics/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from numpy.distutils.misc_util import Configuration 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | config = Configuration("metrics", parent_package, top_path) 9 | 10 | libraries = [] 11 | if os.name == "posix": 12 | libraries.append("m") 13 | 14 | config.add_subpackage("_plot") 15 | config.add_subpackage("_plot.tests") 16 | config.add_subpackage("cluster") 17 | 18 | config.add_extension( 19 | "_pairwise_fast", sources=["_pairwise_fast.pyx"], libraries=libraries 20 | ) 21 | 22 | config.add_extension( 23 | "_dist_metrics", 24 | sources=["_dist_metrics.pyx"], 25 | include_dirs=[np.get_include(), os.path.join(np.get_include(), "numpy")], 26 | libraries=libraries, 27 | ) 28 | 29 | config.add_subpackage("tests") 30 | 31 | return config 32 | 33 | 34 | if __name__ == "__main__": 35 | from numpy.distutils.core import setup 36 | 37 | setup(**configuration().todict()) 38 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | # Commits pushed to main should not make the overall 8 | # project coverage decrease by more than 1%: 9 | target: auto 10 | threshold: 1% 11 | patch: 12 | default: 13 | # Be tolerant on slight code coverage diff on PRs to limit 14 | # noisy red coverage status on github PRs. 15 | # Note: The coverage stats are still uploaded 16 | # to codecov so that PR reviewers can see uncovered lines 17 | target: auto 18 | threshold: 1% 19 | 20 | codecov: 21 | notify: 22 | # Prevent coverage status to upload multiple times for parallel and long 23 | # running CI pipelines. This configuration is particularly useful on PRs 24 | # to avoid confusion. Note that this value is set to the number of Azure 25 | # Pipeline jobs uploading coverage reports. 26 | after_n_builds: 6 27 | 28 | ignore: 29 | - "sklearn/externals" 30 | - "sklearn/_build_utils" 31 | - "**/setup.py" 32 | -------------------------------------------------------------------------------- /sklearn/cluster/tests/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common utilities for testing clustering. 3 | 4 | """ 5 | 6 | import numpy as np 7 | 8 | 9 | ############################################################################### 10 | # Generate sample data 11 | 12 | 13 | def generate_clustered_data( 14 | seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=0.4 15 | ): 16 | prng = np.random.RandomState(seed) 17 | 18 | # the data is voluntary shifted away from zero to check clustering 19 | # algorithm robustness with regards to non centered data 20 | means = ( 21 | np.array( 22 | [ 23 | [1, 1, 1, 0], 24 | [-1, -1, 0, 1], 25 | [1, -1, 1, 1], 26 | [-1, 1, 1, 0], 27 | ] 28 | ) 29 | + 10 30 | ) 31 | 32 | X = np.empty((0, n_features)) 33 | for i in range(n_clusters): 34 | X = np.r_[ 35 | X, 36 | means[i][:n_features] + std * prng.randn(n_samples_per_cluster, n_features), 37 | ] 38 | return X 39 | -------------------------------------------------------------------------------- /examples/datasets/plot_digits_last_image.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ========================================================= 4 | The Digit Dataset 5 | ========================================================= 6 | 7 | This dataset is made up of 1797 8x8 images. Each image, 8 | like the one shown below, is of a hand-written digit. 9 | In order to utilize an 8x8 figure like this, we'd have to 10 | first transform it into a feature vector with length 64. 11 | 12 | See `here 13 | `_ 14 | for more information about this dataset. 15 | 16 | """ 17 | 18 | # Code source: Gaël Varoquaux 19 | # Modified for documentation by Jaques Grobler 20 | # License: BSD 3 clause 21 | 22 | from sklearn import datasets 23 | 24 | import matplotlib.pyplot as plt 25 | 26 | # Load the digits dataset 27 | digits = datasets.load_digits() 28 | 29 | # Display the first digit 30 | plt.figure(1, figsize=(3, 3)) 31 | plt.imshow(digits.images[-1], cmap=plt.cm.gray_r, interpolation="nearest") 32 | plt.show() 33 | -------------------------------------------------------------------------------- /doc/whats_new.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: sklearn 2 | .. include:: whats_new/_contributors.rst 3 | 4 | Release History 5 | =============== 6 | 7 | Release notes for all scikit-learn releases are linked in this page. 8 | 9 | **Tip:** `Subscribe to scikit-learn releases `__ 10 | on libraries.io to be notified when new versions are released. 11 | 12 | .. toctree:: 13 | :maxdepth: 1 14 | 15 | Version 1.1 16 | Version 1.0 17 | Version 0.24 18 | Version 0.23 19 | Version 0.22 20 | Version 0.21 21 | Version 0.20 22 | Version 0.19 23 | Version 0.18 24 | Version 0.17 25 | Version 0.16 26 | Version 0.15 27 | Version 0.14 28 | Version 0.13 29 | Older Versions 30 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/test_olivetti_faces.py: -------------------------------------------------------------------------------- 1 | """Test Olivetti faces fetcher, if the data is available, 2 | or if specifically requested via environment variable 3 | (e.g. for travis cron job).""" 4 | 5 | import numpy as np 6 | 7 | from sklearn.utils import Bunch 8 | from sklearn.datasets.tests.test_common import check_return_X_y 9 | 10 | from sklearn.utils._testing import assert_array_equal 11 | 12 | 13 | def test_olivetti_faces(fetch_olivetti_faces_fxt): 14 | data = fetch_olivetti_faces_fxt(shuffle=True, random_state=0) 15 | 16 | assert isinstance(data, Bunch) 17 | for expected_keys in ("data", "images", "target", "DESCR"): 18 | assert expected_keys in data.keys() 19 | 20 | assert data.data.shape == (400, 4096) 21 | assert data.images.shape == (400, 64, 64) 22 | assert data.target.shape == (400,) 23 | assert_array_equal(np.unique(np.sort(data.target)), np.arange(40)) 24 | assert data.DESCR.startswith(".. _olivetti_faces_dataset:") 25 | 26 | # test the return_X_y option 27 | check_return_X_y(data, fetch_olivetti_faces_fxt) 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | recursive-include doc * 3 | recursive-include examples * 4 | recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi *.tp 5 | recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz 6 | include COPYING 7 | include README.rst 8 | include pyproject.toml 9 | include sklearn/externals/README 10 | include sklearn/svm/src/liblinear/COPYRIGHT 11 | include sklearn/svm/src/libsvm/LIBSVM_CHANGES 12 | include conftest.py 13 | include Makefile 14 | include MANIFEST.in 15 | include .coveragerc 16 | 17 | # exclude from sdist 18 | recursive-exclude asv_benchmarks * 19 | recursive-exclude benchmarks * 20 | recursive-exclude build_tools * 21 | recursive-exclude maint_tools * 22 | recursive-exclude benchmarks * 23 | recursive-exclude .binder * 24 | recursive-exclude .circleci * 25 | exclude .codecov.yml 26 | exclude .git-blame-ignore-revs 27 | exclude .mailmap 28 | exclude .pre-commit-config.yaml 29 | exclude azure-pipelines.yml 30 | exclude lgtm.yml 31 | exclude CODE_OF_CONDUCT.md 32 | exclude CONTRIBUTING.md 33 | exclude PULL_REQUEST_TEMPLATE.md 34 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_parallel.py: -------------------------------------------------------------------------------- 1 | from distutils.version import LooseVersion 2 | 3 | import pytest 4 | from joblib import Parallel 5 | import joblib 6 | 7 | from numpy.testing import assert_array_equal 8 | 9 | from sklearn._config import config_context, get_config 10 | from sklearn.utils.fixes import delayed 11 | 12 | 13 | def get_working_memory(): 14 | return get_config()["working_memory"] 15 | 16 | 17 | @pytest.mark.parametrize("n_jobs", [1, 2]) 18 | @pytest.mark.parametrize("backend", ["loky", "threading", "multiprocessing"]) 19 | def test_configuration_passes_through_to_joblib(n_jobs, backend): 20 | # Tests that the global global configuration is passed to joblib jobs 21 | 22 | if joblib.__version__ < LooseVersion("0.12") and backend == "loky": 23 | pytest.skip("loky backend does not exist in joblib <0.12") 24 | 25 | with config_context(working_memory=123): 26 | results = Parallel(n_jobs=n_jobs, backend=backend)( 27 | delayed(get_working_memory)() for _ in range(2) 28 | ) 29 | 30 | assert_array_equal(results, [123] * 2) 31 | -------------------------------------------------------------------------------- /examples/feature_selection/plot_rfe_digits.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================= 3 | Recursive feature elimination 4 | ============================= 5 | 6 | A recursive feature elimination example showing the relevance of pixels in 7 | a digit classification task. 8 | 9 | .. note:: 10 | 11 | See also :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py` 12 | 13 | """ # noqa: E501 14 | 15 | from sklearn.svm import SVC 16 | from sklearn.datasets import load_digits 17 | from sklearn.feature_selection import RFE 18 | import matplotlib.pyplot as plt 19 | 20 | # Load the digits dataset 21 | digits = load_digits() 22 | X = digits.images.reshape((len(digits.images), -1)) 23 | y = digits.target 24 | 25 | # Create the RFE object and rank each pixel 26 | svc = SVC(kernel="linear", C=1) 27 | rfe = RFE(estimator=svc, n_features_to_select=1, step=1) 28 | rfe.fit(X, y) 29 | ranking = rfe.ranking_.reshape(digits.images[0].shape) 30 | 31 | # Plot pixel ranking 32 | plt.matshow(ranking, cmap=plt.cm.Blues) 33 | plt.colorbar() 34 | plt.title("Ranking of pixels with RFE") 35 | plt.show() 36 | -------------------------------------------------------------------------------- /doc/datasets/real_world.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. _real_world_datasets: 6 | 7 | Real world datasets 8 | =================== 9 | 10 | .. currentmodule:: sklearn.datasets 11 | 12 | scikit-learn provides tools to load larger datasets, downloading them if 13 | necessary. 14 | 15 | They can be loaded using the following functions: 16 | 17 | .. autosummary:: 18 | 19 | fetch_olivetti_faces 20 | fetch_20newsgroups 21 | fetch_20newsgroups_vectorized 22 | fetch_lfw_people 23 | fetch_lfw_pairs 24 | fetch_covtype 25 | fetch_rcv1 26 | fetch_kddcup99 27 | fetch_california_housing 28 | 29 | .. include:: ../../sklearn/datasets/descr/olivetti_faces.rst 30 | 31 | .. include:: ../../sklearn/datasets/descr/twenty_newsgroups.rst 32 | 33 | .. include:: ../../sklearn/datasets/descr/lfw.rst 34 | 35 | .. include:: ../../sklearn/datasets/descr/covtype.rst 36 | 37 | .. include:: ../../sklearn/datasets/descr/rcv1.rst 38 | 39 | .. include:: ../../sklearn/datasets/descr/kddcup99.rst 40 | 41 | .. include:: ../../sklearn/datasets/descr/california_housing.rst 42 | -------------------------------------------------------------------------------- /examples/exercises/plot_digits_classification_exercise.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================ 3 | Digits Classification Exercise 4 | ================================ 5 | 6 | A tutorial exercise regarding the use of classification techniques on 7 | the Digits dataset. 8 | 9 | This exercise is used in the :ref:`clf_tut` part of the 10 | :ref:`supervised_learning_tut` section of the 11 | :ref:`stat_learn_tut_index`. 12 | 13 | """ 14 | 15 | from sklearn import datasets, neighbors, linear_model 16 | 17 | X_digits, y_digits = datasets.load_digits(return_X_y=True) 18 | X_digits = X_digits / X_digits.max() 19 | 20 | n_samples = len(X_digits) 21 | 22 | X_train = X_digits[: int(0.9 * n_samples)] 23 | y_train = y_digits[: int(0.9 * n_samples)] 24 | X_test = X_digits[int(0.9 * n_samples) :] 25 | y_test = y_digits[int(0.9 * n_samples) :] 26 | 27 | knn = neighbors.KNeighborsClassifier() 28 | logistic = linear_model.LogisticRegression(max_iter=1000) 29 | 30 | print("KNN score: %f" % knn.fit(X_train, y_train).score(X_test, y_test)) 31 | print( 32 | "LogisticRegression score: %f" 33 | % logistic.fit(X_train, y_train).score(X_test, y_test) 34 | ) 35 | -------------------------------------------------------------------------------- /sklearn/mixture/tests/test_mixture.py: -------------------------------------------------------------------------------- 1 | # Author: Guillaume Lemaitre 2 | # License: BSD 3 clause 3 | 4 | import pytest 5 | import numpy as np 6 | 7 | from sklearn.mixture import GaussianMixture 8 | from sklearn.mixture import BayesianGaussianMixture 9 | 10 | 11 | @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()]) 12 | def test_gaussian_mixture_n_iter(estimator): 13 | # check that n_iter is the number of iteration performed. 14 | rng = np.random.RandomState(0) 15 | X = rng.rand(10, 5) 16 | max_iter = 1 17 | estimator.set_params(max_iter=max_iter) 18 | estimator.fit(X) 19 | assert estimator.n_iter_ == max_iter 20 | 21 | 22 | @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()]) 23 | def test_mixture_n_components_greater_than_n_samples_error(estimator): 24 | """Check error when n_components <= n_samples""" 25 | rng = np.random.RandomState(0) 26 | X = rng.rand(10, 5) 27 | estimator.set_params(n_components=12) 28 | 29 | msg = "Expected n_samples >= n_components" 30 | with pytest.raises(ValueError, match=msg): 31 | estimator.fit(X) 32 | -------------------------------------------------------------------------------- /examples/miscellaneous/plot_changed_only_pprint_parameter.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================= 3 | Compact estimator representations 4 | ================================= 5 | 6 | This example illustrates the use of the print_changed_only global parameter. 7 | 8 | Setting print_changed_only to True will alternate the representation of 9 | estimators to only show the parameters that have been set to non-default 10 | values. This can be used to have more compact representations. 11 | 12 | """ 13 | 14 | from sklearn.linear_model import LogisticRegression 15 | from sklearn import set_config 16 | 17 | 18 | lr = LogisticRegression(penalty="l1") 19 | print("Default representation:") 20 | print(lr) 21 | # LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, 22 | # intercept_scaling=1, l1_ratio=None, max_iter=100, 23 | # multi_class='auto', n_jobs=None, penalty='l1', 24 | # random_state=None, solver='warn', tol=0.0001, verbose=0, 25 | # warm_start=False) 26 | 27 | set_config(print_changed_only=True) 28 | print("\nWith changed_only option:") 29 | print(lr) 30 | # LogisticRegression(penalty='l1') 31 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/solutions/generate_skeletons.py: -------------------------------------------------------------------------------- 1 | """Generate skeletons from the example code""" 2 | import os 3 | 4 | exercise_dir = os.path.dirname(__file__) 5 | if exercise_dir == '': 6 | exercise_dir = '.' 7 | 8 | skeleton_dir = os.path.abspath(os.path.join(exercise_dir, '..', 'skeletons')) 9 | if not os.path.exists(skeleton_dir): 10 | os.makedirs(skeleton_dir) 11 | 12 | solutions = os.listdir(exercise_dir) 13 | 14 | for f in solutions: 15 | if not f.endswith('.py'): 16 | continue 17 | 18 | if f == os.path.basename(__file__): 19 | continue 20 | 21 | print("Generating skeleton for %s" % f) 22 | 23 | input_file = open(os.path.join(exercise_dir, f)) 24 | output_file = open(os.path.join(skeleton_dir, f), 'w') 25 | 26 | in_exercise_region = False 27 | 28 | for line in input_file: 29 | linestrip = line.strip() 30 | if len(linestrip) == 0: 31 | in_exercise_region = False 32 | elif linestrip.startswith('# TASK:'): 33 | in_exercise_region = True 34 | 35 | if not in_exercise_region or linestrip.startswith('#'): 36 | output_file.write(line) 37 | 38 | output_file.close() 39 | -------------------------------------------------------------------------------- /examples/linear_model/plot_lasso_lars.py: -------------------------------------------------------------------------------- 1 | """ 2 | ===================== 3 | Lasso path using LARS 4 | ===================== 5 | 6 | Computes Lasso Path along the regularization parameter using the LARS 7 | algorithm on the diabetes dataset. Each color represents a different 8 | feature of the coefficient vector, and this is displayed as a function 9 | of the regularization parameter. 10 | 11 | """ 12 | 13 | # Author: Fabian Pedregosa 14 | # Alexandre Gramfort 15 | # License: BSD 3 clause 16 | 17 | import numpy as np 18 | import matplotlib.pyplot as plt 19 | 20 | from sklearn import linear_model 21 | from sklearn import datasets 22 | 23 | X, y = datasets.load_diabetes(return_X_y=True) 24 | 25 | print("Computing regularization path using the LARS ...") 26 | _, _, coefs = linear_model.lars_path(X, y, method="lasso", verbose=True) 27 | 28 | xx = np.sum(np.abs(coefs.T), axis=1) 29 | xx /= xx[-1] 30 | 31 | plt.plot(xx, coefs.T) 32 | ymin, ymax = plt.ylim() 33 | plt.vlines(xx, ymin, ymax, linestyle="dashed") 34 | plt.xlabel("|coef| / max|coef|") 35 | plt.ylabel("Coefficients") 36 | plt.title("LASSO Path") 37 | plt.axis("tight") 38 | plt.show() 39 | -------------------------------------------------------------------------------- /doc/inspection.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. include:: includes/big_toc_css.rst 6 | 7 | .. _inspection: 8 | 9 | Inspection 10 | ---------- 11 | 12 | Predictive performance is often the main goal of developing machine learning 13 | models. Yet summarising performance with an evaluation metric is often 14 | insufficient: it assumes that the evaluation metric and test dataset 15 | perfectly reflect the target domain, which is rarely true. In certain domains, 16 | a model needs a certain level of interpretability before it can be deployed. 17 | A model that is exhibiting performance issues needs to be debugged for one to 18 | understand the model's underlying issue. The 19 | :mod:`sklearn.inspection` module provides tools to help understand the 20 | predictions from a model and what affects them. This can be used to 21 | evaluate assumptions and biases of a model, design a better model, or 22 | to diagnose issues with model performance. 23 | 24 | .. topic:: Examples: 25 | 26 | * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` 27 | 28 | .. toctree:: 29 | 30 | modules/partial_dependence 31 | modules/permutation_importance 32 | -------------------------------------------------------------------------------- /sklearn/neighbors/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def configuration(parent_package="", top_path=None): 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | config = Configuration("neighbors", parent_package, top_path) 9 | libraries = [] 10 | if os.name == "posix": 11 | libraries.append("m") 12 | 13 | config.add_extension( 14 | "_ball_tree", 15 | sources=["_ball_tree.pyx"], 16 | include_dirs=[numpy.get_include()], 17 | libraries=libraries, 18 | ) 19 | 20 | config.add_extension( 21 | "_kd_tree", 22 | sources=["_kd_tree.pyx"], 23 | include_dirs=[numpy.get_include()], 24 | libraries=libraries, 25 | ) 26 | 27 | config.add_extension( 28 | "_partition_nodes", 29 | sources=["_partition_nodes.pyx"], 30 | include_dirs=[numpy.get_include()], 31 | language="c++", 32 | libraries=libraries, 33 | ) 34 | 35 | config.add_extension( 36 | "_quad_tree", 37 | sources=["_quad_tree.pyx"], 38 | include_dirs=[numpy.get_include()], 39 | libraries=libraries, 40 | ) 41 | 42 | config.add_subpackage("tests") 43 | 44 | return config 45 | -------------------------------------------------------------------------------- /doc/datasets/toy_dataset.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. _toy_datasets: 6 | 7 | Toy datasets 8 | ============ 9 | 10 | .. currentmodule:: sklearn.datasets 11 | 12 | scikit-learn comes with a few small standard datasets that do not require to 13 | download any file from some external website. 14 | 15 | They can be loaded using the following functions: 16 | 17 | .. autosummary:: 18 | 19 | load_boston 20 | load_iris 21 | load_diabetes 22 | load_digits 23 | load_linnerud 24 | load_wine 25 | load_breast_cancer 26 | 27 | These datasets are useful to quickly illustrate the behavior of the 28 | various algorithms implemented in scikit-learn. They are however often too 29 | small to be representative of real world machine learning tasks. 30 | 31 | .. include:: ../../sklearn/datasets/descr/boston_house_prices.rst 32 | 33 | .. include:: ../../sklearn/datasets/descr/iris.rst 34 | 35 | .. include:: ../../sklearn/datasets/descr/diabetes.rst 36 | 37 | .. include:: ../../sklearn/datasets/descr/digits.rst 38 | 39 | .. include:: ../../sklearn/datasets/descr/linnerud.rst 40 | 41 | .. include:: ../../sklearn/datasets/descr/wine_data.rst 42 | 43 | .. include:: ../../sklearn/datasets/descr/breast_cancer.rst 44 | -------------------------------------------------------------------------------- /sklearn/utils/_arpack.py: -------------------------------------------------------------------------------- 1 | from .validation import check_random_state 2 | 3 | 4 | def _init_arpack_v0(size, random_state): 5 | """Initialize the starting vector for iteration in ARPACK functions. 6 | 7 | Initialize a ndarray with values sampled from the uniform distribution on 8 | [-1, 1]. This initialization model has been chosen to be consistent with 9 | the ARPACK one as another initialization can lead to convergence issues. 10 | 11 | Parameters 12 | ---------- 13 | size : int 14 | The size of the eigenvalue vector to be initialized. 15 | 16 | random_state : int, RandomState instance or None, default=None 17 | The seed of the pseudo random number generator used to generate a 18 | uniform distribution. If int, random_state is the seed used by the 19 | random number generator; If RandomState instance, random_state is the 20 | random number generator; If None, the random number generator is the 21 | RandomState instance used by `np.random`. 22 | 23 | Returns 24 | ------- 25 | v0 : ndarray of shape (size,) 26 | The initialized vector. 27 | """ 28 | random_state = check_random_state(random_state) 29 | v0 = random_state.uniform(-1, 1, size) 30 | return v0 31 | -------------------------------------------------------------------------------- /examples/svm/plot_svm_nonlinear.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============== 3 | Non-linear SVM 4 | ============== 5 | 6 | Perform binary classification using non-linear SVC 7 | with RBF kernel. The target to predict is a XOR of the 8 | inputs. 9 | 10 | The color map illustrates the decision function learned by the SVC. 11 | 12 | """ 13 | 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | from sklearn import svm 17 | 18 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500)) 19 | np.random.seed(0) 20 | X = np.random.randn(300, 2) 21 | Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) 22 | 23 | # fit the model 24 | clf = svm.NuSVC(gamma="auto") 25 | clf.fit(X, Y) 26 | 27 | # plot the decision function for each datapoint on the grid 28 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 29 | Z = Z.reshape(xx.shape) 30 | 31 | plt.imshow( 32 | Z, 33 | interpolation="nearest", 34 | extent=(xx.min(), xx.max(), yy.min(), yy.max()), 35 | aspect="auto", 36 | origin="lower", 37 | cmap=plt.cm.PuOr_r, 38 | ) 39 | contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2, linestyles="dashed") 40 | plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired, edgecolors="k") 41 | plt.xticks(()) 42 | plt.yticks(()) 43 | plt.axis([-3, 3, -3, 3]) 44 | plt.show() 45 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | # Minimum requirements for the build system to execute. 3 | requires = [ 4 | "setuptools", 5 | "wheel", 6 | "Cython>=0.28.5", 7 | 8 | # use oldest-supported-numpy which provides the oldest numpy version with 9 | # wheels on PyPI 10 | # 11 | # see: https://github.com/scipy/oldest-supported-numpy/blob/master/setup.cfg 12 | "oldest-supported-numpy; python_version!='3.7' or platform_machine=='aarch64' or platform_system=='AIX' or platform_python_implementation == 'PyPy'", 13 | 14 | # Override oldest-supported-numpy setting because pandas 0.25.0 requires 1.14.6 15 | "numpy==1.14.6; python_version=='3.7' and platform_machine!='aarch64' and platform_system!='AIX' and platform_python_implementation != 'PyPy'", 16 | 17 | "scipy>=1.1.0", 18 | ] 19 | 20 | [tool.black] 21 | line-length = 88 22 | target_version = ['py37', 'py38', 'py39'] 23 | experimental_string_processing = true 24 | exclude = ''' 25 | /( 26 | \.eggs # exclude a few common directories in the 27 | | \.git # root of the project 28 | | \.mypy_cache 29 | | \.vscode 30 | | build 31 | | dist 32 | | doc/tutorial 33 | | doc/_build 34 | | doc/auto_examples 35 | | sklearn/externals 36 | | asv_benchmarks/env 37 | )/ 38 | ''' 39 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_show_versions.py: -------------------------------------------------------------------------------- 1 | from sklearn.utils.fixes import threadpool_info 2 | from sklearn.utils._show_versions import _get_sys_info 3 | from sklearn.utils._show_versions import _get_deps_info 4 | from sklearn.utils._show_versions import show_versions 5 | from sklearn.utils._testing import ignore_warnings 6 | 7 | 8 | def test_get_sys_info(): 9 | sys_info = _get_sys_info() 10 | 11 | assert "python" in sys_info 12 | assert "executable" in sys_info 13 | assert "machine" in sys_info 14 | 15 | 16 | def test_get_deps_info(): 17 | with ignore_warnings(): 18 | deps_info = _get_deps_info() 19 | 20 | assert "pip" in deps_info 21 | assert "setuptools" in deps_info 22 | assert "sklearn" in deps_info 23 | assert "numpy" in deps_info 24 | assert "scipy" in deps_info 25 | assert "Cython" in deps_info 26 | assert "pandas" in deps_info 27 | assert "matplotlib" in deps_info 28 | assert "joblib" in deps_info 29 | 30 | 31 | def test_show_versions(capsys): 32 | with ignore_warnings(): 33 | show_versions() 34 | out, err = capsys.readouterr() 35 | 36 | assert "python" in out 37 | assert "numpy" in out 38 | 39 | info = threadpool_info() 40 | if info: 41 | assert "threadpoolctl info:" in out 42 | -------------------------------------------------------------------------------- /sklearn/decomposition/_cdnmf_fast.pyx: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel, Tom Dupre la Tour 2 | # License: BSD 3 clause 3 | 4 | from cython cimport floating 5 | from libc.math cimport fabs 6 | 7 | 8 | def _update_cdnmf_fast(floating[:, ::1] W, floating[:, :] HHt, 9 | floating[:, :] XHt, Py_ssize_t[::1] permutation): 10 | cdef: 11 | floating violation = 0 12 | Py_ssize_t n_components = W.shape[1] 13 | Py_ssize_t n_samples = W.shape[0] # n_features for H update 14 | floating grad, pg, hess 15 | Py_ssize_t i, r, s, t 16 | 17 | with nogil: 18 | for s in range(n_components): 19 | t = permutation[s] 20 | 21 | for i in range(n_samples): 22 | # gradient = GW[t, i] where GW = np.dot(W, HHt) - XHt 23 | grad = -XHt[i, t] 24 | 25 | for r in range(n_components): 26 | grad += HHt[t, r] * W[i, r] 27 | 28 | # projected gradient 29 | pg = min(0., grad) if W[i, t] == 0 else grad 30 | violation += fabs(pg) 31 | 32 | # Hessian 33 | hess = HHt[t, t] 34 | 35 | if hess != 0: 36 | W[i, t] = max(W[i, t] - grad / hess, 0.) 37 | 38 | return violation 39 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 5 | 6 | #### Reference Issues/PRs 7 | 13 | 14 | 15 | #### What does this implement/fix? Explain your changes. 16 | 17 | 18 | #### Any other comments? 19 | 20 | 21 | 33 | -------------------------------------------------------------------------------- /doc/includes/bigger_toc_css.rst: -------------------------------------------------------------------------------- 1 | .. 2 | File to ..include in a document with a very big table of content, to 3 | give it 'style' 4 | 5 | .. raw:: html 6 | 7 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /sklearn/covariance/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.covariance` module includes methods and algorithms to 3 | robustly estimate the covariance of features given a set of points. The 4 | precision matrix defined as the inverse of the covariance is also estimated. 5 | Covariance estimation is closely related to the theory of Gaussian Graphical 6 | Models. 7 | """ 8 | 9 | from ._empirical_covariance import ( 10 | empirical_covariance, 11 | EmpiricalCovariance, 12 | log_likelihood, 13 | ) 14 | from ._shrunk_covariance import ( 15 | shrunk_covariance, 16 | ShrunkCovariance, 17 | ledoit_wolf, 18 | ledoit_wolf_shrinkage, 19 | LedoitWolf, 20 | oas, 21 | OAS, 22 | ) 23 | from ._robust_covariance import fast_mcd, MinCovDet 24 | from ._graph_lasso import graphical_lasso, GraphicalLasso, GraphicalLassoCV 25 | from ._elliptic_envelope import EllipticEnvelope 26 | 27 | 28 | __all__ = [ 29 | "EllipticEnvelope", 30 | "EmpiricalCovariance", 31 | "GraphicalLasso", 32 | "GraphicalLassoCV", 33 | "LedoitWolf", 34 | "MinCovDet", 35 | "OAS", 36 | "ShrunkCovariance", 37 | "empirical_covariance", 38 | "fast_mcd", 39 | "graphical_lasso", 40 | "ledoit_wolf", 41 | "ledoit_wolf_shrinkage", 42 | "log_likelihood", 43 | "oas", 44 | "shrunk_covariance", 45 | ] 46 | -------------------------------------------------------------------------------- /sklearn/tests/test_build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import textwrap 4 | 5 | from sklearn import __version__ 6 | from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled 7 | 8 | 9 | def test_openmp_parallelism_enabled(): 10 | # Check that sklearn is built with OpenMP-based parallelism enabled. 11 | # This test can be skipped by setting the environment variable 12 | # ``SKLEARN_SKIP_OPENMP_TEST``. 13 | if os.getenv("SKLEARN_SKIP_OPENMP_TEST"): 14 | pytest.skip("test explicitly skipped (SKLEARN_SKIP_OPENMP_TEST)") 15 | 16 | base_url = "dev" if __version__.endswith(".dev0") else "stable" 17 | err_msg = textwrap.dedent( 18 | """ 19 | This test fails because scikit-learn has been built without OpenMP. 20 | This is not recommended since some estimators will run in sequential 21 | mode instead of leveraging thread-based parallelism. 22 | 23 | You can find instructions to build scikit-learn with OpenMP at this 24 | address: 25 | 26 | https://scikit-learn.org/{}/developers/advanced_installation.html 27 | 28 | You can skip this test by setting the environment variable 29 | SKLEARN_SKIP_OPENMP_TEST to any value. 30 | """ 31 | ).format(base_url) 32 | 33 | assert _openmp_parallelism_enabled(), err_msg 34 | -------------------------------------------------------------------------------- /asv_benchmarks/benchmarks/neighbors.py: -------------------------------------------------------------------------------- 1 | from sklearn.neighbors import KNeighborsClassifier 2 | 3 | from .common import Benchmark, Estimator, Predictor 4 | from .datasets import _20newsgroups_lowdim_dataset 5 | from .utils import make_gen_classif_scorers 6 | 7 | 8 | class KNeighborsClassifierBenchmark(Predictor, Estimator, Benchmark): 9 | """ 10 | Benchmarks for KNeighborsClassifier. 11 | """ 12 | 13 | param_names = ["algorithm", "dimension", "n_jobs"] 14 | params = (["brute", "kd_tree", "ball_tree"], ["low", "high"], Benchmark.n_jobs_vals) 15 | 16 | def setup_cache(self): 17 | super().setup_cache() 18 | 19 | def make_data(self, params): 20 | algorithm, dimension, n_jobs = params 21 | 22 | if Benchmark.data_size == "large": 23 | n_components = 40 if dimension == "low" else 200 24 | else: 25 | n_components = 10 if dimension == "low" else 50 26 | 27 | data = _20newsgroups_lowdim_dataset(n_components=n_components) 28 | 29 | return data 30 | 31 | def make_estimator(self, params): 32 | algorithm, dimension, n_jobs = params 33 | 34 | estimator = KNeighborsClassifier(algorithm=algorithm, n_jobs=n_jobs) 35 | 36 | return estimator 37 | 38 | def make_scorers(self): 39 | make_gen_classif_scorers(self) 40 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/covtype.rst: -------------------------------------------------------------------------------- 1 | .. _covtype_dataset: 2 | 3 | Forest covertypes 4 | ----------------- 5 | 6 | The samples in this dataset correspond to 30×30m patches of forest in the US, 7 | collected for the task of predicting each patch's cover type, 8 | i.e. the dominant species of tree. 9 | There are seven covertypes, making this a multiclass classification problem. 10 | Each sample has 54 features, described on the 11 | `dataset's homepage `__. 12 | Some of the features are boolean indicators, 13 | while others are discrete or continuous measurements. 14 | 15 | **Data Set Characteristics:** 16 | 17 | ================= ============ 18 | Classes 7 19 | Samples total 581012 20 | Dimensionality 54 21 | Features int 22 | ================= ============ 23 | 24 | :func:`sklearn.datasets.fetch_covtype` will load the covertype dataset; 25 | it returns a dictionary-like 'Bunch' object 26 | with the feature matrix in the ``data`` member 27 | and the target values in ``target``. If optional argument 'as_frame' is 28 | set to 'True', it will return ``data`` and ``target`` as pandas 29 | data frame, and there will be an additional member ``frame`` as well. 30 | The dataset will be downloaded from the web if necessary. 31 | -------------------------------------------------------------------------------- /examples/exercises/plot_cv_digits.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================================= 3 | Cross-validation on Digits Dataset Exercise 4 | ============================================= 5 | 6 | A tutorial exercise using Cross-validation with an SVM on the Digits dataset. 7 | 8 | This exercise is used in the :ref:`cv_generators_tut` part of the 9 | :ref:`model_selection_tut` section of the :ref:`stat_learn_tut_index`. 10 | 11 | """ 12 | 13 | import numpy as np 14 | from sklearn.model_selection import cross_val_score 15 | from sklearn import datasets, svm 16 | 17 | X, y = datasets.load_digits(return_X_y=True) 18 | 19 | svc = svm.SVC(kernel="linear") 20 | C_s = np.logspace(-10, 0, 10) 21 | 22 | scores = list() 23 | scores_std = list() 24 | for C in C_s: 25 | svc.C = C 26 | this_scores = cross_val_score(svc, X, y, n_jobs=1) 27 | scores.append(np.mean(this_scores)) 28 | scores_std.append(np.std(this_scores)) 29 | 30 | # Do the plotting 31 | import matplotlib.pyplot as plt 32 | 33 | plt.figure() 34 | plt.semilogx(C_s, scores) 35 | plt.semilogx(C_s, np.array(scores) + np.array(scores_std), "b--") 36 | plt.semilogx(C_s, np.array(scores) - np.array(scores_std), "b--") 37 | locs, labels = plt.yticks() 38 | plt.yticks(locs, list(map(lambda x: "%g" % x, locs))) 39 | plt.ylabel("CV score") 40 | plt.xlabel("Parameter C") 41 | plt.ylim(0, 1.1) 42 | plt.show() 43 | -------------------------------------------------------------------------------- /examples/cluster/plot_kmeans_plusplus.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================================== 3 | An example of K-Means++ initialization 4 | =========================================================== 5 | 6 | An example to show the output of the :func:`sklearn.cluster.kmeans_plusplus` 7 | function for generating initial seeds for clustering. 8 | 9 | K-Means++ is used as the default initialization for :ref:`k_means`. 10 | 11 | """ 12 | 13 | from sklearn.cluster import kmeans_plusplus 14 | from sklearn.datasets import make_blobs 15 | import matplotlib.pyplot as plt 16 | 17 | # Generate sample data 18 | n_samples = 4000 19 | n_components = 4 20 | 21 | X, y_true = make_blobs( 22 | n_samples=n_samples, centers=n_components, cluster_std=0.60, random_state=0 23 | ) 24 | X = X[:, ::-1] 25 | 26 | # Calculate seeds from kmeans++ 27 | centers_init, indices = kmeans_plusplus(X, n_clusters=4, random_state=0) 28 | 29 | # Plot init seeds along side sample data 30 | plt.figure(1) 31 | colors = ["#4EACC5", "#FF9C34", "#4E9A06", "m"] 32 | 33 | for k, col in enumerate(colors): 34 | cluster_data = y_true == k 35 | plt.scatter(X[cluster_data, 0], X[cluster_data, 1], c=col, marker=".", s=10) 36 | 37 | plt.scatter(centers_init[:, 0], centers_init[:, 1], c="b", s=50) 38 | plt.title("K-Means++ Initialization") 39 | plt.xticks([]) 40 | plt.yticks([]) 41 | plt.show() 42 | -------------------------------------------------------------------------------- /sklearn/utils/src/MurmurHash3.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | #ifndef _MURMURHASH3_H_ 6 | #define _MURMURHASH3_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // Platform-specific functions and macros 10 | 11 | // Microsoft Visual Studio 12 | 13 | #if defined(_MSC_VER) 14 | 15 | typedef unsigned char uint8_t; 16 | typedef unsigned long uint32_t; 17 | typedef unsigned __int64 uint64_t; 18 | 19 | // Other compilers 20 | 21 | #else // defined(_MSC_VER) 22 | 23 | #include 24 | 25 | #endif // !defined(_MSC_VER) 26 | 27 | //----------------------------------------------------------------------------- 28 | #ifdef __cplusplus 29 | extern "C" { 30 | #endif 31 | 32 | 33 | void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); 34 | 35 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); 36 | 37 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | 43 | //----------------------------------------------------------------------------- 44 | 45 | #endif // _MURMURHASH3_H_ 46 | -------------------------------------------------------------------------------- /sklearn/experimental/enable_halving_search_cv.py: -------------------------------------------------------------------------------- 1 | """Enables Successive Halving search-estimators 2 | 3 | The API and results of these estimators might change without any deprecation 4 | cycle. 5 | 6 | Importing this file dynamically sets the 7 | :class:`~sklearn.model_selection.HalvingRandomSearchCV` and 8 | :class:`~sklearn.model_selection.HalvingGridSearchCV` as attributes of the 9 | `model_selection` module:: 10 | 11 | >>> # explicitly require this experimental feature 12 | >>> from sklearn.experimental import enable_halving_search_cv # noqa 13 | >>> # now you can import normally from model_selection 14 | >>> from sklearn.model_selection import HalvingRandomSearchCV 15 | >>> from sklearn.model_selection import HalvingGridSearchCV 16 | 17 | 18 | The ``# noqa`` comment comment can be removed: it just tells linters like 19 | flake8 to ignore the import, which appears as unused. 20 | """ 21 | 22 | from ..model_selection._search_successive_halving import ( 23 | HalvingRandomSearchCV, 24 | HalvingGridSearchCV, 25 | ) 26 | 27 | from .. import model_selection 28 | 29 | # use settattr to avoid mypy errors when monkeypatching 30 | setattr(model_selection, "HalvingRandomSearchCV", HalvingRandomSearchCV) 31 | setattr(model_selection, "HalvingGridSearchCV", HalvingGridSearchCV) 32 | 33 | model_selection.__all__ += ["HalvingRandomSearchCV", "HalvingGridSearchCV"] 34 | -------------------------------------------------------------------------------- /maint_tools/sort_whats_new.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Sorts what's new entries with per-module headings. 3 | # Pass what's new entries on stdin. 4 | 5 | import sys 6 | import re 7 | from collections import defaultdict 8 | 9 | LABEL_ORDER = ["MajorFeature", "Feature", "Enhancement", "Efficiency", "Fix", "API"] 10 | 11 | 12 | def entry_sort_key(s): 13 | if s.startswith("- |"): 14 | return LABEL_ORDER.index(s.split("|")[1]) 15 | else: 16 | return -1 17 | 18 | 19 | # discard headings and other non-entry lines 20 | text = "".join(l for l in sys.stdin if l.startswith("- ") or l.startswith(" ")) 21 | 22 | bucketed = defaultdict(list) 23 | 24 | for entry in re.split("\n(?=- )", text.strip()): 25 | modules = re.findall( 26 | r":(?:func|meth|mod|class):" r"`(?:[^<`]*<|~)?(?:sklearn.)?([a-z]\w+)", entry 27 | ) 28 | modules = set(modules) 29 | if len(modules) > 1: 30 | key = "Multiple modules" 31 | elif modules: 32 | key = ":mod:`sklearn.%s`" % next(iter(modules)) 33 | else: 34 | key = "Miscellaneous" 35 | bucketed[key].append(entry) 36 | entry = entry.strip() + "\n" 37 | 38 | everything = [] 39 | for key, bucket in sorted(bucketed.items()): 40 | everything.append(key + "\n" + "." * len(key)) 41 | bucket.sort(key=entry_sort_key) 42 | everything.extend(bucket) 43 | print("\n\n".join(everything)) 44 | -------------------------------------------------------------------------------- /sklearn/linear_model/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy 3 | 4 | from sklearn._build_utils import gen_from_templates 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | from numpy.distutils.misc_util import Configuration 9 | 10 | config = Configuration("linear_model", parent_package, top_path) 11 | 12 | libraries = [] 13 | if os.name == "posix": 14 | libraries.append("m") 15 | 16 | config.add_extension( 17 | "_cd_fast", 18 | sources=["_cd_fast.pyx"], 19 | include_dirs=numpy.get_include(), 20 | libraries=libraries, 21 | ) 22 | 23 | config.add_extension( 24 | "_sgd_fast", 25 | sources=["_sgd_fast.pyx"], 26 | include_dirs=numpy.get_include(), 27 | libraries=libraries, 28 | ) 29 | 30 | # generate sag_fast from template 31 | templates = ["sklearn/linear_model/_sag_fast.pyx.tp"] 32 | gen_from_templates(templates) 33 | 34 | config.add_extension( 35 | "_sag_fast", sources=["_sag_fast.pyx"], include_dirs=numpy.get_include() 36 | ) 37 | 38 | # add other directories 39 | config.add_subpackage("tests") 40 | config.add_subpackage("_glm") 41 | config.add_subpackage("_glm/tests") 42 | 43 | return config 44 | 45 | 46 | if __name__ == "__main__": 47 | from numpy.distutils.core import setup 48 | 49 | setup(**configuration(top_path="").todict()) 50 | -------------------------------------------------------------------------------- /sklearn/neighbors/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.neighbors` module implements the k-nearest neighbors 3 | algorithm. 4 | """ 5 | 6 | from ._ball_tree import BallTree 7 | from ._kd_tree import KDTree 8 | from ._distance_metric import DistanceMetric 9 | from ._graph import kneighbors_graph, radius_neighbors_graph 10 | from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer 11 | from ._unsupervised import NearestNeighbors 12 | from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier 13 | from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor 14 | from ._nearest_centroid import NearestCentroid 15 | from ._kde import KernelDensity 16 | from ._lof import LocalOutlierFactor 17 | from ._nca import NeighborhoodComponentsAnalysis 18 | from ._base import VALID_METRICS, VALID_METRICS_SPARSE 19 | 20 | __all__ = [ 21 | "BallTree", 22 | "DistanceMetric", 23 | "KDTree", 24 | "KNeighborsClassifier", 25 | "KNeighborsRegressor", 26 | "KNeighborsTransformer", 27 | "NearestCentroid", 28 | "NearestNeighbors", 29 | "RadiusNeighborsClassifier", 30 | "RadiusNeighborsRegressor", 31 | "RadiusNeighborsTransformer", 32 | "kneighbors_graph", 33 | "radius_neighbors_graph", 34 | "KernelDensity", 35 | "LocalOutlierFactor", 36 | "NeighborhoodComponentsAnalysis", 37 | "VALID_METRICS", 38 | "VALID_METRICS_SPARSE", 39 | ] 40 | -------------------------------------------------------------------------------- /examples/linear_model/plot_sgd_separating_hyperplane.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | SGD: Maximum margin separating hyperplane 4 | ========================================= 5 | 6 | Plot the maximum margin separating hyperplane within a two-class 7 | separable dataset using a linear Support Vector Machines classifier 8 | trained using SGD. 9 | 10 | """ 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | from sklearn.linear_model import SGDClassifier 15 | from sklearn.datasets import make_blobs 16 | 17 | # we create 50 separable points 18 | X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) 19 | 20 | # fit the model 21 | clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200) 22 | 23 | clf.fit(X, Y) 24 | 25 | # plot the line, the points, and the nearest vectors to the plane 26 | xx = np.linspace(-1, 5, 10) 27 | yy = np.linspace(-1, 5, 10) 28 | 29 | X1, X2 = np.meshgrid(xx, yy) 30 | Z = np.empty(X1.shape) 31 | for (i, j), val in np.ndenumerate(X1): 32 | x1 = val 33 | x2 = X2[i, j] 34 | p = clf.decision_function([[x1, x2]]) 35 | Z[i, j] = p[0] 36 | levels = [-1.0, 0.0, 1.0] 37 | linestyles = ["dashed", "solid", "dashed"] 38 | colors = "k" 39 | plt.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles) 40 | plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, edgecolor="black", s=20) 41 | 42 | plt.axis("tight") 43 | plt.show() 44 | -------------------------------------------------------------------------------- /sklearn/tree/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | from numpy.distutils.misc_util import Configuration 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | config = Configuration("tree", parent_package, top_path) 9 | libraries = [] 10 | if os.name == "posix": 11 | libraries.append("m") 12 | config.add_extension( 13 | "_tree", 14 | sources=["_tree.pyx"], 15 | include_dirs=[numpy.get_include()], 16 | libraries=libraries, 17 | extra_compile_args=["-O3"], 18 | ) 19 | config.add_extension( 20 | "_splitter", 21 | sources=["_splitter.pyx"], 22 | include_dirs=[numpy.get_include()], 23 | libraries=libraries, 24 | extra_compile_args=["-O3"], 25 | ) 26 | config.add_extension( 27 | "_criterion", 28 | sources=["_criterion.pyx"], 29 | include_dirs=[numpy.get_include()], 30 | libraries=libraries, 31 | extra_compile_args=["-O3"], 32 | ) 33 | config.add_extension( 34 | "_utils", 35 | sources=["_utils.pyx"], 36 | include_dirs=[numpy.get_include()], 37 | libraries=libraries, 38 | extra_compile_args=["-O3"], 39 | ) 40 | 41 | config.add_subpackage("tests") 42 | 43 | return config 44 | 45 | 46 | if __name__ == "__main__": 47 | from numpy.distutils.core import setup 48 | 49 | setup(**configuration().todict()) 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.so 3 | *.pyd 4 | *~ 5 | .#* 6 | *.lprof 7 | *.swp 8 | *.swo 9 | .DS_Store 10 | build 11 | sklearn/datasets/__config__.py 12 | sklearn/**/*.html 13 | 14 | dist/ 15 | MANIFEST 16 | doc/_build/ 17 | doc/auto_examples/ 18 | doc/modules/generated/ 19 | doc/datasets/generated/ 20 | doc/min_dependency_table.rst 21 | doc/min_dependency_substitutions.rst 22 | *.pdf 23 | pip-log.txt 24 | scikit_learn.egg-info/ 25 | .coverage 26 | coverage 27 | *.py,cover 28 | .tags* 29 | tags 30 | covtype.data.gz 31 | 20news-18828/ 32 | 20news-18828.tar.gz 33 | coverages.zip 34 | samples.zip 35 | doc/coverages.zip 36 | doc/samples.zip 37 | coverages 38 | samples 39 | doc/coverages 40 | doc/samples 41 | *.prof 42 | .tox/ 43 | .coverage 44 | pip-wheel-metadata 45 | 46 | lfw_preprocessed/ 47 | nips2010_pdf/ 48 | 49 | *.nt.bz2 50 | *.tar.gz 51 | *.tgz 52 | 53 | examples/cluster/joblib 54 | reuters/ 55 | benchmarks/bench_covertype_data/ 56 | 57 | *.prefs 58 | .pydevproject 59 | .idea 60 | .vscode 61 | 62 | *.c 63 | *.cpp 64 | 65 | !/**/src/**/*.c 66 | !/**/src/**/*.cpp 67 | *.sln 68 | *.pyproj 69 | 70 | # Used by py.test 71 | .cache 72 | .pytest_cache/ 73 | _configtest.o.d 74 | 75 | # Used by mypy 76 | .mypy_cache/ 77 | 78 | # files generated from a template 79 | sklearn/utils/_seq_dataset.pyx 80 | sklearn/utils/_seq_dataset.pxd 81 | sklearn/utils/_weight_vector.pyx 82 | sklearn/utils/_weight_vector.pxd 83 | sklearn/linear_model/_sag_fast.pyx 84 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/common.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Y_DYTPE is the dtype to which the targets y are converted to. This is also 4 | # dtype for leaf values, gains, and sums of gradients / hessians. The gradients 5 | # and hessians arrays are stored as floats to avoid using too much memory. 6 | Y_DTYPE = np.float64 7 | X_DTYPE = np.float64 8 | X_BINNED_DTYPE = np.uint8 # hence max_bins == 256 9 | # dtype for gradients and hessians arrays 10 | G_H_DTYPE = np.float32 11 | X_BITSET_INNER_DTYPE = np.uint32 12 | 13 | HISTOGRAM_DTYPE = np.dtype([ 14 | ('sum_gradients', Y_DTYPE), # sum of sample gradients in bin 15 | ('sum_hessians', Y_DTYPE), # sum of sample hessians in bin 16 | ('count', np.uint32), # number of samples in bin 17 | ]) 18 | 19 | PREDICTOR_RECORD_DTYPE = np.dtype([ 20 | ('value', Y_DTYPE), 21 | ('count', np.uint32), 22 | ('feature_idx', np.uint32), 23 | ('num_threshold', X_DTYPE), 24 | ('missing_go_to_left', np.uint8), 25 | ('left', np.uint32), 26 | ('right', np.uint32), 27 | ('gain', Y_DTYPE), 28 | ('depth', np.uint32), 29 | ('is_leaf', np.uint8), 30 | ('bin_threshold', X_BINNED_DTYPE), 31 | ('is_categorical', np.uint8), 32 | # The index of the corresponding bitsets in the Predictor's bitset arrays. 33 | # Only used if is_categorical is True 34 | ('bitset_idx', np.uint32) 35 | ]) 36 | 37 | ALMOST_INF = 1e300 # see LightGBM AvoidInf() 38 | -------------------------------------------------------------------------------- /doc/modules/isotonic.rst: -------------------------------------------------------------------------------- 1 | .. _isotonic: 2 | 3 | =================== 4 | Isotonic regression 5 | =================== 6 | 7 | .. currentmodule:: sklearn.isotonic 8 | 9 | The class :class:`IsotonicRegression` fits a non-decreasing real function to 10 | 1-dimensional data. It solves the following problem: 11 | 12 | minimize :math:`\sum_i w_i (y_i - \hat{y}_i)^2` 13 | 14 | subject to :math:`\hat{y}_i \le \hat{y}_j` whenever :math:`X_i \le X_j`, 15 | 16 | where the weights :math:`w_i` are strictly positive, and both `X` and `y` are 17 | arbitrary real quantities. 18 | 19 | The `increasing` parameter changes the constraint to 20 | :math:`\hat{y}_i \ge \hat{y}_j` whenever :math:`X_i \le X_j`. Setting it to 21 | 'auto' will automatically choose the constraint based on `Spearman's rank 22 | correlation coefficient 23 | `_. 24 | 25 | :class:`IsotonicRegression` produces a series of predictions 26 | :math:`\hat{y}_i` for the training data which are the closest to the targets 27 | :math:`y` in terms of mean squared error. These predictions are interpolated 28 | for predicting to unseen data. The predictions of :class:`IsotonicRegression` 29 | thus form a function that is piecewise linear: 30 | 31 | .. figure:: ../auto_examples/miscellaneous/images/sphx_glr_plot_isotonic_regression_001.png 32 | :target: ../auto_examples/miscellaneous/plot_isotonic_regression.html 33 | :align: center 34 | -------------------------------------------------------------------------------- /examples/linear_model/plot_sgd_loss_functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================== 3 | SGD: convex loss functions 4 | ========================== 5 | 6 | A plot that compares the various convex loss functions supported by 7 | :class:`~sklearn.linear_model.SGDClassifier` . 8 | 9 | """ 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | def modified_huber_loss(y_true, y_pred): 16 | z = y_pred * y_true 17 | loss = -4 * z 18 | loss[z >= -1] = (1 - z[z >= -1]) ** 2 19 | loss[z >= 1.0] = 0 20 | return loss 21 | 22 | 23 | xmin, xmax = -4, 4 24 | xx = np.linspace(xmin, xmax, 100) 25 | lw = 2 26 | plt.plot([xmin, 0, 0, xmax], [1, 1, 0, 0], color="gold", lw=lw, label="Zero-one loss") 27 | plt.plot(xx, np.where(xx < 1, 1 - xx, 0), color="teal", lw=lw, label="Hinge loss") 28 | plt.plot(xx, -np.minimum(xx, 0), color="yellowgreen", lw=lw, label="Perceptron loss") 29 | plt.plot(xx, np.log2(1 + np.exp(-xx)), color="cornflowerblue", lw=lw, label="Log loss") 30 | plt.plot( 31 | xx, 32 | np.where(xx < 1, 1 - xx, 0) ** 2, 33 | color="orange", 34 | lw=lw, 35 | label="Squared hinge loss", 36 | ) 37 | plt.plot( 38 | xx, 39 | modified_huber_loss(xx, 1), 40 | color="darkorchid", 41 | lw=lw, 42 | linestyle="--", 43 | label="Modified Huber loss", 44 | ) 45 | plt.ylim((0, 8)) 46 | plt.legend(loc="upper right") 47 | plt.xlabel(r"Decision function $f(x)$") 48 | plt.ylabel("$L(y=1, f(x))$") 49 | plt.show() 50 | -------------------------------------------------------------------------------- /sklearn/linear_model/_glm/tests/test_link.py: -------------------------------------------------------------------------------- 1 | # Authors: Christian Lorentzen 2 | # 3 | # License: BSD 3 clause 4 | import numpy as np 5 | from numpy.testing import assert_allclose 6 | import pytest 7 | from scipy.optimize import check_grad 8 | 9 | from sklearn.linear_model._glm.link import ( 10 | IdentityLink, 11 | LogLink, 12 | LogitLink, 13 | ) 14 | 15 | 16 | LINK_FUNCTIONS = [IdentityLink, LogLink, LogitLink] 17 | 18 | 19 | @pytest.mark.parametrize("Link", LINK_FUNCTIONS) 20 | def test_link_properties(Link): 21 | """Test link inverse and derivative.""" 22 | rng = np.random.RandomState(42) 23 | x = rng.rand(100) * 100 24 | link = Link() 25 | if isinstance(link, LogitLink): 26 | # careful for large x, note expit(36) = 1 27 | # limit max eta to 15 28 | x = x / 100 * 15 29 | assert_allclose(link(link.inverse(x)), x) 30 | # if g(h(x)) = x, then g'(h(x)) = 1/h'(x) 31 | # g = link, h = link.inverse 32 | assert_allclose(link.derivative(link.inverse(x)), 1 / link.inverse_derivative(x)) 33 | 34 | 35 | @pytest.mark.parametrize("Link", LINK_FUNCTIONS) 36 | def test_link_derivative(Link): 37 | link = Link() 38 | x = np.random.RandomState(0).rand(1) 39 | err = check_grad(link, link.derivative, x) / link.derivative(x) 40 | assert abs(err) < 1e-6 41 | 42 | err = check_grad(link.inverse, link.inverse_derivative, x) / link.derivative(x) 43 | assert abs(err) < 1e-6 44 | -------------------------------------------------------------------------------- /sklearn/decomposition/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.decomposition` module includes matrix decomposition 3 | algorithms, including among others PCA, NMF or ICA. Most of the algorithms of 4 | this module can be regarded as dimensionality reduction techniques. 5 | """ 6 | 7 | 8 | from ._nmf import NMF, non_negative_factorization 9 | from ._pca import PCA 10 | from ._incremental_pca import IncrementalPCA 11 | from ._kernel_pca import KernelPCA 12 | from ._sparse_pca import SparsePCA, MiniBatchSparsePCA 13 | from ._truncated_svd import TruncatedSVD 14 | from ._fastica import FastICA, fastica 15 | from ._dict_learning import ( 16 | dict_learning, 17 | dict_learning_online, 18 | sparse_encode, 19 | DictionaryLearning, 20 | MiniBatchDictionaryLearning, 21 | SparseCoder, 22 | ) 23 | from ._factor_analysis import FactorAnalysis 24 | from ..utils.extmath import randomized_svd 25 | from ._lda import LatentDirichletAllocation 26 | 27 | 28 | __all__ = [ 29 | "DictionaryLearning", 30 | "FastICA", 31 | "IncrementalPCA", 32 | "KernelPCA", 33 | "MiniBatchDictionaryLearning", 34 | "MiniBatchSparsePCA", 35 | "NMF", 36 | "PCA", 37 | "SparseCoder", 38 | "SparsePCA", 39 | "dict_learning", 40 | "dict_learning_online", 41 | "fastica", 42 | "non_negative_factorization", 43 | "randomized_svd", 44 | "sparse_encode", 45 | "FactorAnalysis", 46 | "TruncatedSVD", 47 | "LatentDirichletAllocation", 48 | ] 49 | -------------------------------------------------------------------------------- /benchmarks/bench_plot_ward.py: -------------------------------------------------------------------------------- 1 | """ 2 | Benchmark scikit-learn's Ward implement compared to SciPy's 3 | """ 4 | 5 | import time 6 | 7 | import numpy as np 8 | from scipy.cluster import hierarchy 9 | import matplotlib.pyplot as plt 10 | 11 | from sklearn.cluster import AgglomerativeClustering 12 | 13 | ward = AgglomerativeClustering(n_clusters=3, linkage="ward") 14 | 15 | n_samples = np.logspace(0.5, 3, 9) 16 | n_features = np.logspace(1, 3.5, 7) 17 | N_samples, N_features = np.meshgrid(n_samples, n_features) 18 | scikits_time = np.zeros(N_samples.shape) 19 | scipy_time = np.zeros(N_samples.shape) 20 | 21 | for i, n in enumerate(n_samples): 22 | for j, p in enumerate(n_features): 23 | X = np.random.normal(size=(n, p)) 24 | t0 = time.time() 25 | ward.fit(X) 26 | scikits_time[j, i] = time.time() - t0 27 | t0 = time.time() 28 | hierarchy.ward(X) 29 | scipy_time[j, i] = time.time() - t0 30 | 31 | ratio = scikits_time / scipy_time 32 | 33 | plt.figure("scikit-learn Ward's method benchmark results") 34 | plt.imshow(np.log(ratio), aspect="auto", origin="lower") 35 | plt.colorbar() 36 | plt.contour( 37 | ratio, 38 | levels=[ 39 | 1, 40 | ], 41 | colors="k", 42 | ) 43 | plt.yticks(range(len(n_features)), n_features.astype(int)) 44 | plt.ylabel("N features") 45 | plt.xticks(range(len(n_samples)), n_samples.astype(int)) 46 | plt.xlabel("N samples") 47 | plt.title("Scikit's time, in units of scipy time (log)") 48 | plt.show() 49 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/common.pxd: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | np.import_array() 5 | 6 | 7 | ctypedef np.npy_float64 X_DTYPE_C 8 | ctypedef np.npy_uint8 X_BINNED_DTYPE_C 9 | ctypedef np.npy_float64 Y_DTYPE_C 10 | ctypedef np.npy_float32 G_H_DTYPE_C 11 | ctypedef np.npy_uint32 BITSET_INNER_DTYPE_C 12 | ctypedef BITSET_INNER_DTYPE_C[8] BITSET_DTYPE_C 13 | 14 | cdef packed struct hist_struct: 15 | # Same as histogram dtype but we need a struct to declare views. It needs 16 | # to be packed since by default numpy dtypes aren't aligned 17 | Y_DTYPE_C sum_gradients 18 | Y_DTYPE_C sum_hessians 19 | unsigned int count 20 | 21 | 22 | cdef packed struct node_struct: 23 | # Equivalent struct to PREDICTOR_RECORD_DTYPE to use in memory views. It 24 | # needs to be packed since by default numpy dtypes aren't aligned 25 | Y_DTYPE_C value 26 | unsigned int count 27 | unsigned int feature_idx 28 | X_DTYPE_C num_threshold 29 | unsigned char missing_go_to_left 30 | unsigned int left 31 | unsigned int right 32 | Y_DTYPE_C gain 33 | unsigned int depth 34 | unsigned char is_leaf 35 | X_BINNED_DTYPE_C bin_threshold 36 | unsigned char is_categorical 37 | # The index of the corresponding bitsets in the Predictor's bitset arrays. 38 | # Only used if is_categorical is True 39 | unsigned int bitset_idx 40 | 41 | cpdef enum MonotonicConstraint: 42 | NO_CST = 0 43 | POS = 1 44 | NEG = -1 45 | --------------------------------------------------------------------------------