├── sklearn
    ├── _loss
    │   ├── __init__.py
    │   └── tests
    │   │   └── __init__.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_check_build.py
    │   ├── test_init.py
    │   └── test_build.py
    ├── cluster
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── common.py
    │   └── _k_means_common.pxd
    ├── compose
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── datasets
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── digits.csv.gz
    │   │   ├── diabetes_data.csv.gz
    │   │   ├── diabetes_target.csv.gz
    │   │   ├── linnerud_exercise.csv
    │   │   └── linnerud_physiological.csv
    │   ├── descr
    │   │   ├── __init__.py
    │   │   ├── linnerud.rst
    │   │   └── covtype.rst
    │   ├── images
    │   │   ├── __init__.py
    │   │   ├── china.jpg
    │   │   ├── flower.jpg
    │   │   └── README.txt
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── openml
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── id_1
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-1.json.gz
    │   │   │   │   │   ├── api-v1-jdf-1.json.gz
    │   │   │   │   │   ├── api-v1-jdq-1.json.gz
    │   │   │   │   │   └── data-v1-dl-1.arff.gz
    │   │   │   │   ├── id_2
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-2.json.gz
    │   │   │   │   │   ├── api-v1-jdf-2.json.gz
    │   │   │   │   │   ├── api-v1-jdq-2.json.gz
    │   │   │   │   │   ├── data-v1-dl-1666876.arff.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-anneal-l-2-dv-1.json.gz
    │   │   │   │   │   └── api-v1-jdl-dn-anneal-l-2-s-act-.json.gz
    │   │   │   │   ├── id_292
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-292.json.gz
    │   │   │   │   │   ├── api-v1-jd-40981.json.gz
    │   │   │   │   │   ├── api-v1-jdf-292.json.gz
    │   │   │   │   │   ├── api-v1-jdf-40981.json.gz
    │   │   │   │   │   ├── data-v1-dl-49822.arff.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-australian-l-2-dv-1.json.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-australian-l-2-s-act-.json.gz
    │   │   │   │   │   └── api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz
    │   │   │   │   ├── id_3
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-3.json.gz
    │   │   │   │   │   ├── api-v1-jdf-3.json.gz
    │   │   │   │   │   ├── api-v1-jdq-3.json.gz
    │   │   │   │   │   └── data-v1-dl-3.arff.gz
    │   │   │   │   ├── id_561
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-561.json.gz
    │   │   │   │   │   ├── api-v1-jdf-561.json.gz
    │   │   │   │   │   ├── api-v1-jdq-561.json.gz
    │   │   │   │   │   ├── data-v1-dl-52739.arff.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-cpu-l-2-dv-1.json.gz
    │   │   │   │   │   └── api-v1-jdl-dn-cpu-l-2-s-act-.json.gz
    │   │   │   │   ├── id_61
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-61.json.gz
    │   │   │   │   │   ├── api-v1-jdf-61.json.gz
    │   │   │   │   │   ├── api-v1-jdq-61.json.gz
    │   │   │   │   │   ├── data-v1-dl-61.arff.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-iris-l-2-dv-1.json.gz
    │   │   │   │   │   └── api-v1-jdl-dn-iris-l-2-s-act-.json.gz
    │   │   │   │   ├── id_62
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-62.json.gz
    │   │   │   │   │   ├── api-v1-jdf-62.json.gz
    │   │   │   │   │   ├── api-v1-jdq-62.json.gz
    │   │   │   │   │   └── data-v1-dl-52352.arff.gz
    │   │   │   │   ├── id_1119
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-1119.json.gz
    │   │   │   │   │   ├── api-v1-jdf-1119.json.gz
    │   │   │   │   │   ├── api-v1-jdq-1119.json.gz
    │   │   │   │   │   ├── data-v1-dl-54002.arff.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz
    │   │   │   │   │   └── api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz
    │   │   │   │   ├── id_40589
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-40589.json.gz
    │   │   │   │   │   ├── api-v1-jdf-40589.json.gz
    │   │   │   │   │   ├── api-v1-jdq-40589.json.gz
    │   │   │   │   │   ├── data-v1-dl-4644182.arff.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-emotions-l-2-dv-3.json.gz
    │   │   │   │   │   └── api-v1-jdl-dn-emotions-l-2-s-act-.json.gz
    │   │   │   │   ├── id_40675
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-40675.json.gz
    │   │   │   │   │   ├── api-v1-jdf-40675.json.gz
    │   │   │   │   │   ├── api-v1-jdq-40675.json.gz
    │   │   │   │   │   ├── data-v1-dl-4965250.arff.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-glass2-l-2-dv-1.json.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-glass2-l-2-s-act-.json.gz
    │   │   │   │   │   └── api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz
    │   │   │   │   ├── id_40945
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-40945.json.gz
    │   │   │   │   │   ├── api-v1-jdf-40945.json.gz
    │   │   │   │   │   ├── api-v1-jdq-40945.json.gz
    │   │   │   │   │   └── data-v1-dl-16826755.arff.gz
    │   │   │   │   ├── id_40966
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-40966.json.gz
    │   │   │   │   │   ├── api-v1-jdf-40966.json.gz
    │   │   │   │   │   ├── api-v1-jdq-40966.json.gz
    │   │   │   │   │   ├── data-v1-dl-17928620.arff.gz
    │   │   │   │   │   ├── api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz
    │   │   │   │   │   └── api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz
    │   │   │   │   └── id_42585
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── api-v1-jd-42585.json.gz
    │   │   │   │   │   ├── api-v1-jdf-42585.json.gz
    │   │   │   │   │   ├── api-v1-jdq-42585.json.gz
    │   │   │   │   │   └── data-v1-dl-21854866.arff.gz
    │   │   │   ├── svmlight_invalid_order.txt
    │   │   │   ├── svmlight_invalid.txt
    │   │   │   ├── svmlight_multilabel.txt
    │   │   │   └── svmlight_classification.txt
    │   │   ├── conftest.py
    │   │   └── test_olivetti_faces.py
    │   └── setup.py
    ├── impute
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── metrics
    │   ├── _plot
    │   │   ├── __init__.py
    │   │   └── tests
    │   │   │   └── __init__.py
    │   ├── tests
    │   │   └── __init__.py
    │   ├── cluster
    │   │   ├── tests
    │   │   │   └── __init__.py
    │   │   └── setup.py
    │   └── setup.py
    ├── mixture
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_mixture.py
    │   └── __init__.py
    ├── svm
    │   ├── tests
    │   │   └── __init__.py
    │   ├── src
    │   │   ├── libsvm
    │   │   │   ├── libsvm_template.cpp
    │   │   │   ├── _svm_cython_blas_helpers.h
    │   │   │   └── LIBSVM_CHANGES
    │   │   └── liblinear
    │   │   │   ├── _cython_blas_helpers.h
    │   │   │   └── tron.h
    │   ├── _newrand.pyx
    │   └── __init__.py
    ├── tree
    │   ├── tests
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── setup.py
    ├── utils
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── test_arpack.py
    │   │   ├── test_weight_vector.py
    │   │   ├── test_arrayfuncs.py
    │   │   ├── test_optimize.py
    │   │   ├── test_cython_templating.py
    │   │   ├── test_fast_dict.py
    │   │   ├── test_parallel.py
    │   │   └── test_show_versions.py
    │   ├── _typedefs.pxd
    │   ├── _fast_dict.pxd
    │   ├── _joblib.py
    │   ├── _logistic_sigmoid.pyx
    │   ├── _typedefs.pyx
    │   ├── murmurhash.pxd
    │   ├── _arpack.py
    │   └── src
    │   │   └── MurmurHash3.h
    ├── covariance
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── ensemble
    │   ├── tests
    │   │   └── __init__.py
    │   └── _hist_gradient_boosting
    │   │   ├── tests
    │   │       └── __init__.py
    │   │   ├── __init__.py
    │   │   ├── _bitset.pxd
    │   │   ├── common.pyx
    │   │   └── common.pxd
    ├── experimental
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_enable_hist_gradient_boosting.py
    │   ├── __init__.py
    │   ├── enable_iterative_imputer.py
    │   ├── enable_hist_gradient_boosting.py
    │   └── enable_halving_search_cv.py
    ├── inspection
    │   ├── _plot
    │   │   ├── __init__.py
    │   │   └── tests
    │   │   │   └── __init__.py
    │   ├── tests
    │   │   └── __init__.py
    │   ├── setup.py
    │   └── __init__.py
    ├── linear_model
    │   ├── tests
    │   │   └── __init__.py
    │   ├── _glm
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   └── test_link.py
    │   │   └── __init__.py
    │   ├── _sgd_fast_helpers.h
    │   ├── _sgd_fast.pxd
    │   └── setup.py
    ├── manifold
    │   ├── tests
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── setup.py
    ├── neighbors
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_kd_tree.py
    │   ├── _partition_nodes.pxd
    │   ├── _distance_metric.py
    │   ├── setup.py
    │   └── __init__.py
    ├── decomposition
    │   ├── tests
    │   │   └── __init__.py
    │   ├── setup.py
    │   ├── _cdnmf_fast.pyx
    │   └── __init__.py
    ├── externals
    │   ├── _packaging
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── README
    │   └── conftest.py
    ├── feature_selection
    │   └── tests
    │   │   └── __init__.py
    ├── gaussian_process
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── model_selection
    │   └── tests
    │   │   ├── __init__.py
    │   │   └── common.py
    ├── neural_network
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_base.py
    │   └── __init__.py
    ├── preprocessing
    │   ├── tests
    │   │   └── __init__.py
    │   └── setup.py
    ├── semi_supervised
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── cross_decomposition
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── feature_extraction
    │   ├── tests
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── setup.py
    ├── __check_build
    │   ├── _check_build.pyx
    │   └── setup.py
    └── _distributor_init.py
├── doc
    ├── testimonials
    │   ├── images
    │   │   ├── Makefile
    │   │   ├── inria.png
    │   │   ├── mars.png
    │   │   ├── yhat.png
    │   │   ├── zopa.png
    │   │   ├── aweber.png
    │   │   ├── booking.png
    │   │   ├── infonea.jpg
    │   │   ├── lovely.png
    │   │   ├── okcupid.png
    │   │   ├── phimeca.png
    │   │   ├── spotify.png
    │   │   ├── betaworks.png
    │   │   ├── birchbox.jpg
    │   │   ├── datarobot.png
    │   │   ├── evernote.png
    │   │   ├── howaboutwe.png
    │   │   ├── jpmorgan.png
    │   │   ├── machinalis.png
    │   │   ├── peerindex.png
    │   │   ├── rangespan.png
    │   │   ├── change-logo.png
    │   │   ├── dataiku_logo.png
    │   │   ├── datapublica.png
    │   │   ├── huggingface.png
    │   │   ├── solido_logo.png
    │   │   ├── ottogroup_logo.png
    │   │   ├── bestofmedia-logo.png
    │   │   ├── telecomparistech.jpg
    │   │   └── bnp_paribas_cardif.png
    │   └── README.txt
    ├── sphinxext
    │   └── MANIFEST.in
    ├── images
    │   ├── axa.png
    │   ├── bcg.png
    │   ├── bnp.png
    │   ├── dysco.png
    │   ├── intel.png
    │   ├── iris.pdf
    │   ├── anaconda.png
    │   ├── cds-logo.png
    │   ├── columbia.png
    │   ├── dataiku.png
    │   ├── fujitsu.png
    │   ├── ml_map.png
    │   ├── no_image.png
    │   ├── nvidia.png
    │   ├── telecom.png
    │   ├── axa-small.png
    │   ├── bcg-small.png
    │   ├── bnp-small.png
    │   ├── digicosme.png
    │   ├── inria-logo.jpg
    │   ├── last_digit.png
    │   ├── logo_APHP.png
    │   ├── microsoft.png
    │   ├── rbm_graph.png
    │   ├── dataiku-small.png
    │   ├── fujitsu-small.png
    │   ├── google-small.png
    │   ├── inria-small.png
    │   ├── intel-small.png
    │   ├── nvidia-small.png
    │   ├── sloan_banner.png
    │   ├── telecom-small.png
    │   ├── zalando_logo.png
    │   ├── columbia-small.png
    │   ├── fnrs-logo-small.png
    │   ├── lda_model_graph.png
    │   ├── logo_APHP_text.png
    │   ├── microsoft-small.png
    │   ├── multi_org_chart.png
    │   ├── nyu_short_color.png
    │   ├── quansight-labs.png
    │   ├── sydney-primary.jpeg
    │   ├── sloan_logo-small.png
    │   ├── zalando_logo-small.png
    │   ├── grid_search_workflow.png
    │   ├── quansight-labs-small.png
    │   ├── sydney-stacked-small.png
    │   ├── plot_face_recognition_1.png
    │   ├── plot_face_recognition_2.png
    │   ├── scikit-learn-logo-small.png
    │   ├── scikit-learn-logo-notext.png
    │   ├── grid_search_cross_validation.png
    │   ├── multilayerperceptron_network.png
    │   ├── plot_digits_classification.png
    │   └── png-logo-inria-la-fondation.png
    ├── logos
    │   ├── favicon.ico
    │   ├── identity.pdf
    │   ├── scikit-learn-logo.bmp
    │   ├── scikit-learn-logo.png
    │   ├── scikit-learn-logo-notext.png
    │   ├── scikit-learn-logo-small.png
    │   └── scikit-learn-logo-thumb.png
    ├── modules
    │   ├── glm_data
    │   │   ├── lasso_enet_coordinate_descent.png
    │   │   └── poisson_gamma_tweedie_distributions.png
    │   ├── pipeline.rst
    │   └── isotonic.rst
    ├── themes
    │   └── scikit-learn-modern
    │   │   ├── theme.conf
    │   │   └── search.html
    ├── tutorial
    │   ├── common_includes
    │   │   └── info.txt
    │   ├── text_analytics
    │   │   ├── .gitignore
    │   │   ├── data
    │   │   │   ├── movie_reviews
    │   │   │   │   └── fetch_data.py
    │   │   │   └── twenty_newsgroups
    │   │   │   │   └── fetch_data.py
    │   │   └── solutions
    │   │   │   └── generate_skeletons.py
    │   └── index.rst
    ├── templates
    │   ├── generate_deprecated.sh
    │   ├── class.rst
    │   ├── function.rst
    │   ├── numpydoc_docstring.rst
    │   ├── class_with_call.rst
    │   ├── deprecated_function.rst
    │   ├── deprecated_class_without_init.rst
    │   ├── deprecated_class.rst
    │   ├── deprecated_class_with_call.rst
    │   └── redirects.html
    ├── README.md
    ├── binder
    │   └── requirements.txt
    ├── computing.rst
    ├── model_selection.rst
    ├── developers
    │   └── index.rst
    ├── contents.rst
    ├── unsupervised_learning.rst
    ├── communication_team.rst
    ├── whats_new
    │   └── changelog_legend.inc
    ├── user_guide.rst
    ├── preface.rst
    ├── authors_emeritus.rst
    ├── supervised_learning.rst
    ├── includes
    │   ├── big_toc_css.rst
    │   └── bigger_toc_css.rst
    ├── whats_new.rst
    ├── datasets
    │   ├── real_world.rst
    │   └── toy_dataset.rst
    └── inspection.rst
├── .circleci
    └── artifact_path
├── .gitattributes
├── benchmarks
    ├── .gitignore
    ├── plot_tsne_mnist.py
    └── bench_plot_ward.py
├── examples
    ├── README.txt
    ├── exercises
    │   ├── README.txt
    │   ├── plot_digits_classification_exercise.py
    │   └── plot_cv_digits.py
    ├── cluster
    │   ├── README.txt
    │   └── plot_kmeans_plusplus.py
    ├── tree
    │   └── README.txt
    ├── ensemble
    │   └── README.txt
    ├── inspection
    │   └── README.txt
    ├── svm
    │   ├── README.txt
    │   └── plot_svm_nonlinear.py
    ├── bicluster
    │   └── README.txt
    ├── datasets
    │   ├── README.txt
    │   └── plot_digits_last_image.py
    ├── classification
    │   └── README.txt
    ├── impute
    │   └── README.txt
    ├── miscellaneous
    │   ├── README.txt
    │   └── plot_changed_only_pprint_parameter.py
    ├── mixture
    │   └── README.txt
    ├── neighbors
    │   └── README.txt
    ├── preprocessing
    │   └── README.txt
    ├── covariance
    │   └── README.txt
    ├── decomposition
    │   ├── README.txt
    │   └── plot_beta_divergence.py
    ├── manifold
    │   └── README.txt
    ├── multioutput
    │   └── README.txt
    ├── linear_model
    │   ├── README.txt
    │   ├── plot_lasso_lars.py
    │   ├── plot_sgd_separating_hyperplane.py
    │   └── plot_sgd_loss_functions.py
    ├── neural_networks
    │   └── README.txt
    ├── model_selection
    │   ├── README.txt
    │   └── plot_cv_predict.py
    ├── text
    │   └── README.txt
    ├── calibration
    │   └── README.txt
    ├── feature_selection
    │   ├── README.txt
    │   └── plot_rfe_digits.py
    ├── cross_decomposition
    │   └── README.txt
    ├── kernel_approximation
    │   └── README.txt
    ├── release_highlights
    │   └── README.txt
    ├── semi_supervised
    │   └── README.txt
    ├── gaussian_process
    │   └── README.txt
    ├── applications
    │   └── README.txt
    └── compose
    │   └── README.txt
├── asv_benchmarks
    ├── benchmarks
    │   ├── __init__.py
    │   ├── svm.py
    │   ├── manifold.py
    │   └── neighbors.py
    └── .gitignore
├── .binder
    └── requirements.txt
├── .coveragerc
├── .github
    ├── labeler-file-extensions.yml
    ├── workflows
    │   ├── check-manifest.yml
    │   ├── unassign.yml
    │   ├── labeler-title-regex.yml
    │   ├── labeler-module.yml
    │   ├── assign.yml
    │   └── twitter.yml
    ├── ISSUE_TEMPLATE
    │   ├── doc_improvement.yml
    │   ├── feature_request.yml
    │   └── config.yml
    ├── FUNDING.yml
    ├── scripts
    │   └── label_title_regex.py
    └── PULL_REQUEST_TEMPLATE.md
├── lgtm.yml
├── conftest.py
├── SECURITY.md
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── .git-blame-ignore-revs
├── .codecov.yml
├── MANIFEST.in
├── pyproject.toml
├── maint_tools
    └── sort_whats_new.py
└── .gitignore


/sklearn/_loss/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/doc/testimonials/images/Makefile:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/_loss/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/cluster/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/compose/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/impute/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/metrics/_plot/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/metrics/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/mixture/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/svm/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/tree/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/covariance/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/descr/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/images/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/ensemble/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/experimental/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/inspection/_plot/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/inspection/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/manifold/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/decomposition/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/externals/_packaging/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/feature_selection/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/gaussian_process/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/inspection/_plot/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/metrics/_plot/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/metrics/cluster/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/model_selection/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/neural_network/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/preprocessing/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/semi_supervised/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.circleci/artifact_path:
--------------------------------------------------------------------------------
1 | 0/doc/_changed.html
2 | 


--------------------------------------------------------------------------------
/sklearn/cross_decomposition/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/feature_extraction/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | /doc/whats_new/v*.rst merge=union
2 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/ensemble/_hist_gradient_boosting/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/_glm/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # License: BSD 3 clause
2 | 


--------------------------------------------------------------------------------
/benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | /bhtsne
2 | *.npy
3 | *.json
4 | /mnist_tsne_output/
5 | 


--------------------------------------------------------------------------------
/doc/sphinxext/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
3 | 


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 | 
3 | Examples
4 | ========
5 | 


--------------------------------------------------------------------------------
/sklearn/__check_build/_check_build.pyx:
--------------------------------------------------------------------------------
1 | def check_build():
2 |     return
3 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_invalid_order.txt:
--------------------------------------------------------------------------------
1 | -1 5:2.5 2:-5.2 15:1.5
2 | 


--------------------------------------------------------------------------------
/asv_benchmarks/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | """Benchmark suite for scikit-learn using ASV"""
2 | 


--------------------------------------------------------------------------------
/doc/images/axa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/axa.png


--------------------------------------------------------------------------------
/doc/images/bcg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/bcg.png


--------------------------------------------------------------------------------
/doc/images/bnp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/bnp.png


--------------------------------------------------------------------------------
/sklearn/externals/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | """
3 | External, bundled dependencies.
4 | 
5 | """
6 | 


--------------------------------------------------------------------------------
/doc/images/dysco.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/dysco.png


--------------------------------------------------------------------------------
/doc/images/intel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/intel.png


--------------------------------------------------------------------------------
/doc/images/iris.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/iris.pdf


--------------------------------------------------------------------------------
/doc/images/anaconda.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/anaconda.png


--------------------------------------------------------------------------------
/doc/images/cds-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/cds-logo.png


--------------------------------------------------------------------------------
/doc/images/columbia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/columbia.png


--------------------------------------------------------------------------------
/doc/images/dataiku.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/dataiku.png


--------------------------------------------------------------------------------
/doc/images/fujitsu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/fujitsu.png


--------------------------------------------------------------------------------
/doc/images/ml_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/ml_map.png


--------------------------------------------------------------------------------
/doc/images/no_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/no_image.png


--------------------------------------------------------------------------------
/doc/images/nvidia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/nvidia.png


--------------------------------------------------------------------------------
/doc/images/telecom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/telecom.png


--------------------------------------------------------------------------------
/doc/logos/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/favicon.ico


--------------------------------------------------------------------------------
/doc/logos/identity.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/identity.pdf


--------------------------------------------------------------------------------
/doc/images/axa-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/axa-small.png


--------------------------------------------------------------------------------
/doc/images/bcg-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/bcg-small.png


--------------------------------------------------------------------------------
/doc/images/bnp-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/bnp-small.png


--------------------------------------------------------------------------------
/doc/images/digicosme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/digicosme.png


--------------------------------------------------------------------------------
/doc/images/inria-logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/inria-logo.jpg


--------------------------------------------------------------------------------
/doc/images/last_digit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/last_digit.png


--------------------------------------------------------------------------------
/doc/images/logo_APHP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/logo_APHP.png


--------------------------------------------------------------------------------
/doc/images/microsoft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/microsoft.png


--------------------------------------------------------------------------------
/doc/images/rbm_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/rbm_graph.png


--------------------------------------------------------------------------------
/doc/images/dataiku-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/dataiku-small.png


--------------------------------------------------------------------------------
/doc/images/fujitsu-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/fujitsu-small.png


--------------------------------------------------------------------------------
/doc/images/google-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/google-small.png


--------------------------------------------------------------------------------
/doc/images/inria-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/inria-small.png


--------------------------------------------------------------------------------
/doc/images/intel-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/intel-small.png


--------------------------------------------------------------------------------
/doc/images/nvidia-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/nvidia-small.png


--------------------------------------------------------------------------------
/doc/images/sloan_banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/sloan_banner.png


--------------------------------------------------------------------------------
/doc/images/telecom-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/telecom-small.png


--------------------------------------------------------------------------------
/doc/images/zalando_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/zalando_logo.png


--------------------------------------------------------------------------------
/examples/exercises/README.txt:
--------------------------------------------------------------------------------
1 | Tutorial exercises
2 | ------------------
3 | 
4 | Exercises for the tutorials
5 | 


--------------------------------------------------------------------------------
/asv_benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | *__pycache__*
2 | env/
3 | html/
4 | results/
5 | scikit-learn/
6 | benchmarks/cache/
7 | 


--------------------------------------------------------------------------------
/doc/images/columbia-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/columbia-small.png


--------------------------------------------------------------------------------
/doc/images/fnrs-logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/fnrs-logo-small.png


--------------------------------------------------------------------------------
/doc/images/lda_model_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/lda_model_graph.png


--------------------------------------------------------------------------------
/doc/images/logo_APHP_text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/logo_APHP_text.png


--------------------------------------------------------------------------------
/doc/images/microsoft-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/microsoft-small.png


--------------------------------------------------------------------------------
/doc/images/multi_org_chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/multi_org_chart.png


--------------------------------------------------------------------------------
/doc/images/nyu_short_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/nyu_short_color.png


--------------------------------------------------------------------------------
/doc/images/quansight-labs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/quansight-labs.png


--------------------------------------------------------------------------------
/doc/images/sydney-primary.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/sydney-primary.jpeg


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_invalid.txt:
--------------------------------------------------------------------------------
1 | python 2:2.5 10:-5.2 15:1.5
2 | 2.0 5:1.0 12:-3
3 | 3.0 20:27
4 | 


--------------------------------------------------------------------------------
/doc/images/sloan_logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/sloan_logo-small.png


--------------------------------------------------------------------------------
/doc/images/zalando_logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/zalando_logo-small.png


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo.bmp


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo.png


--------------------------------------------------------------------------------
/doc/testimonials/images/inria.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/inria.png


--------------------------------------------------------------------------------
/doc/testimonials/images/mars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/mars.png


--------------------------------------------------------------------------------
/doc/testimonials/images/yhat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/yhat.png


--------------------------------------------------------------------------------
/doc/testimonials/images/zopa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/zopa.png


--------------------------------------------------------------------------------
/sklearn/datasets/images/china.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/images/china.jpg


--------------------------------------------------------------------------------
/doc/images/grid_search_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/grid_search_workflow.png


--------------------------------------------------------------------------------
/doc/images/quansight-labs-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/quansight-labs-small.png


--------------------------------------------------------------------------------
/doc/images/sydney-stacked-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/sydney-stacked-small.png


--------------------------------------------------------------------------------
/doc/testimonials/images/aweber.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/aweber.png


--------------------------------------------------------------------------------
/doc/testimonials/images/booking.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/booking.png


--------------------------------------------------------------------------------
/doc/testimonials/images/infonea.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/infonea.jpg


--------------------------------------------------------------------------------
/doc/testimonials/images/lovely.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/lovely.png


--------------------------------------------------------------------------------
/doc/testimonials/images/okcupid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/okcupid.png


--------------------------------------------------------------------------------
/doc/testimonials/images/phimeca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/phimeca.png


--------------------------------------------------------------------------------
/doc/testimonials/images/spotify.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/spotify.png


--------------------------------------------------------------------------------
/sklearn/datasets/data/digits.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/data/digits.csv.gz


--------------------------------------------------------------------------------
/sklearn/datasets/images/flower.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/images/flower.jpg


--------------------------------------------------------------------------------
/doc/images/plot_face_recognition_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/plot_face_recognition_1.png


--------------------------------------------------------------------------------
/doc/images/plot_face_recognition_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/plot_face_recognition_2.png


--------------------------------------------------------------------------------
/doc/images/scikit-learn-logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/scikit-learn-logo-small.png


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo-notext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo-notext.png


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo-small.png


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo-thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/logos/scikit-learn-logo-thumb.png


--------------------------------------------------------------------------------
/doc/testimonials/images/betaworks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/betaworks.png


--------------------------------------------------------------------------------
/doc/testimonials/images/birchbox.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/birchbox.jpg


--------------------------------------------------------------------------------
/doc/testimonials/images/datarobot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/datarobot.png


--------------------------------------------------------------------------------
/doc/testimonials/images/evernote.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/evernote.png


--------------------------------------------------------------------------------
/doc/testimonials/images/howaboutwe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/howaboutwe.png


--------------------------------------------------------------------------------
/doc/testimonials/images/jpmorgan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/jpmorgan.png


--------------------------------------------------------------------------------
/doc/testimonials/images/machinalis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/machinalis.png


--------------------------------------------------------------------------------
/doc/testimonials/images/peerindex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/peerindex.png


--------------------------------------------------------------------------------
/doc/testimonials/images/rangespan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/rangespan.png


--------------------------------------------------------------------------------
/doc/images/scikit-learn-logo-notext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/scikit-learn-logo-notext.png


--------------------------------------------------------------------------------
/doc/testimonials/images/change-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/change-logo.png


--------------------------------------------------------------------------------
/doc/testimonials/images/dataiku_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/dataiku_logo.png


--------------------------------------------------------------------------------
/doc/testimonials/images/datapublica.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/datapublica.png


--------------------------------------------------------------------------------
/doc/testimonials/images/huggingface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/huggingface.png


--------------------------------------------------------------------------------
/doc/testimonials/images/solido_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/solido_logo.png


--------------------------------------------------------------------------------
/doc/images/grid_search_cross_validation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/grid_search_cross_validation.png


--------------------------------------------------------------------------------
/doc/images/multilayerperceptron_network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/multilayerperceptron_network.png


--------------------------------------------------------------------------------
/doc/images/plot_digits_classification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/plot_digits_classification.png


--------------------------------------------------------------------------------
/doc/images/png-logo-inria-la-fondation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/images/png-logo-inria-la-fondation.png


--------------------------------------------------------------------------------
/doc/testimonials/images/ottogroup_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/ottogroup_logo.png


--------------------------------------------------------------------------------
/sklearn/datasets/data/diabetes_data.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/data/diabetes_data.csv.gz


--------------------------------------------------------------------------------
/doc/testimonials/images/bestofmedia-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/bestofmedia-logo.png


--------------------------------------------------------------------------------
/doc/testimonials/images/telecomparistech.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/telecomparistech.jpg


--------------------------------------------------------------------------------
/sklearn/datasets/data/diabetes_target.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/data/diabetes_target.csv.gz


--------------------------------------------------------------------------------
/doc/testimonials/images/bnp_paribas_cardif.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/testimonials/images/bnp_paribas_cardif.png


--------------------------------------------------------------------------------
/examples/cluster/README.txt:
--------------------------------------------------------------------------------
1 | .. _cluster_examples:
2 | 
3 | Clustering
4 | ----------
5 | 
6 | Examples concerning the :mod:`sklearn.cluster` module.
7 | 


--------------------------------------------------------------------------------
/examples/tree/README.txt:
--------------------------------------------------------------------------------
1 | .. _tree_examples:
2 | 
3 | Decision Trees
4 | --------------
5 | 
6 | Examples concerning the :mod:`sklearn.tree` module.
7 | 


--------------------------------------------------------------------------------
/doc/modules/glm_data/lasso_enet_coordinate_descent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/modules/glm_data/lasso_enet_coordinate_descent.png


--------------------------------------------------------------------------------
/examples/ensemble/README.txt:
--------------------------------------------------------------------------------
1 | .. _ensemble_examples:
2 | 
3 | Ensemble methods
4 | ----------------
5 | 
6 | Examples concerning the :mod:`sklearn.ensemble` module.
7 | 


--------------------------------------------------------------------------------
/examples/inspection/README.txt:
--------------------------------------------------------------------------------
1 | .. _inspection_examples:
2 | 
3 | Inspection
4 | ----------
5 | 
6 | Examples related to the :mod:`sklearn.inspection` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/svm/README.txt:
--------------------------------------------------------------------------------
1 | .. _svm_examples:
2 | 
3 | Support Vector Machines
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.svm` module.
7 | 


--------------------------------------------------------------------------------
/examples/bicluster/README.txt:
--------------------------------------------------------------------------------
1 | .. _bicluster_examples:
2 | 
3 | Biclustering
4 | ------------
5 | 
6 | Examples concerning the :mod:`sklearn.cluster.bicluster` module.
7 | 


--------------------------------------------------------------------------------
/examples/datasets/README.txt:
--------------------------------------------------------------------------------
1 | .. _dataset_examples:
2 | 
3 | Dataset examples
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.datasets` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/cross_decomposition/__init__.py:
--------------------------------------------------------------------------------
1 | from ._pls import PLSCanonical, PLSRegression, PLSSVD, CCA
2 | 
3 | __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"]
4 | 


--------------------------------------------------------------------------------
/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png


--------------------------------------------------------------------------------
/examples/classification/README.txt:
--------------------------------------------------------------------------------
1 | .. _classification_examples:
2 | 
3 | Classification
4 | -----------------------
5 | 
6 | General examples about classification algorithms.
7 | 


--------------------------------------------------------------------------------
/examples/impute/README.txt:
--------------------------------------------------------------------------------
1 | .. _impute_examples:
2 | 
3 | Missing Value Imputation
4 | ------------------------
5 | 
6 | Examples concerning the :mod:`sklearn.impute` module.
7 | 


--------------------------------------------------------------------------------
/examples/miscellaneous/README.txt:
--------------------------------------------------------------------------------
1 | .. _miscellaneous_examples:
2 | 
3 | Miscellaneous
4 | -------------
5 | 
6 | Miscellaneous and introductory examples for scikit-learn.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/mixture/README.txt:
--------------------------------------------------------------------------------
1 | .. _mixture_examples:
2 | 
3 | Gaussian Mixture Models
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.mixture` module.
7 | 


--------------------------------------------------------------------------------
/examples/neighbors/README.txt:
--------------------------------------------------------------------------------
1 | .. _neighbors_examples:
2 | 
3 | Nearest Neighbors
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.neighbors` module.
7 | 


--------------------------------------------------------------------------------
/examples/preprocessing/README.txt:
--------------------------------------------------------------------------------
1 | .. _preprocessing_examples:
2 | 
3 | Preprocessing
4 | -------------
5 | 
6 | Examples concerning the :mod:`sklearn.preprocessing` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/api-v1-jdf-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1/api-v1-jdf-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/api-v1-jdq-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1/api-v1-jdq-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/data-v1-dl-1.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1/data-v1-dl-1.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jd-2.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jd-2.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jdf-2.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jdf-2.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/api-v1-jd-3.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_3/api-v1-jd-3.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/api-v1-jdf-3.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_3/api-v1-jdf-3.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/api-v1-jdq-3.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_3/api-v1-jdq-3.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/data-v1-dl-3.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_3/data-v1-dl-3.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_multilabel.txt:
--------------------------------------------------------------------------------
1 | # multilabel dataset in SVMlight format
2 | 1,0 2:2.5   10:-5.2 15:1.5
3 | 2 5:1.0 12:-3 
4 |  2:3.5 11:26
5 | 1,2 20:27
6 | 


--------------------------------------------------------------------------------
/examples/covariance/README.txt:
--------------------------------------------------------------------------------
1 | .. _covariance_examples:
2 | 
3 | Covariance estimation
4 | ---------------------
5 | 
6 | Examples concerning the :mod:`sklearn.covariance` module.
7 | 


--------------------------------------------------------------------------------
/examples/decomposition/README.txt:
--------------------------------------------------------------------------------
1 | .. _decomposition_examples:
2 | 
3 | Decomposition
4 | -------------
5 | 
6 | Examples concerning the :mod:`sklearn.decomposition` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/manifold/README.txt:
--------------------------------------------------------------------------------
1 | .. _manifold_examples:
2 | 
3 | Manifold learning
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.manifold` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/multioutput/README.txt:
--------------------------------------------------------------------------------
1 | .. _multioutput_examples:
2 | 
3 | Multioutput methods
4 | -------------------
5 | 
6 | Examples concerning the :mod:`sklearn.multioutput` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-292.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-292.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jd-561.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jd-561.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jd-61.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jd-61.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jdf-61.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jdf-61.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jdq-61.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jdq-61.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/data-v1-dl-61.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/data-v1-dl-61.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/api-v1-jd-62.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_62/api-v1-jd-62.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/api-v1-jdf-62.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_62/api-v1-jdf-62.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/api-v1-jdq-62.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_62/api-v1-jdq-62.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jd-1119.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jd-1119.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-40981.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-40981.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-292.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-292.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jdf-561.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jdf-561.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jdq-561.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jdq-561.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/data-v1-dl-52352.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_62/data-v1-dl-52352.arff.gz


--------------------------------------------------------------------------------
/examples/linear_model/README.txt:
--------------------------------------------------------------------------------
1 | .. _linear_examples:
2 | 
3 | Generalized Linear Models
4 | -------------------------
5 | 
6 | Examples concerning the :mod:`sklearn.linear_model` module.
7 | 


--------------------------------------------------------------------------------
/examples/neural_networks/README.txt:
--------------------------------------------------------------------------------
1 | .. _neural_network_examples:
2 | 
3 | Neural Networks
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.neural_network` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/data-v1-dl-1666876.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/data-v1-dl-1666876.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-40981.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-40981.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/data-v1-dl-49822.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/data-v1-dl-49822.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jd-40589.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jd-40589.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdf-40589.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdf-40589.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdq-40589.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdq-40589.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jd-40675.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jd-40675.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdf-40675.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdf-40675.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdq-40675.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdq-40675.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/api-v1-jd-40945.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40945/api-v1-jd-40945.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdf-40945.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdf-40945.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdq-40945.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdq-40945.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jd-40966.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jd-40966.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdf-40966.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdf-40966.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdq-40966.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdq-40966.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/api-v1-jd-42585.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_42585/api-v1-jd-42585.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdf-42585.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdf-42585.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdq-42585.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdq-42585.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/data-v1-dl-52739.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/data-v1-dl-52739.arff.gz


--------------------------------------------------------------------------------
/examples/model_selection/README.txt:
--------------------------------------------------------------------------------
1 | .. _model_selection_examples:
2 | 
3 | Model Selection
4 | -----------------------
5 | 
6 | Examples related to the :mod:`sklearn.model_selection` module.
7 | 


--------------------------------------------------------------------------------
/examples/text/README.txt:
--------------------------------------------------------------------------------
1 | .. _text_examples:
2 | 
3 | Working with text documents
4 | ----------------------------
5 | 
6 | Examples concerning the :mod:`sklearn.feature_extraction.text` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/data-v1-dl-4644182.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/data-v1-dl-4644182.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/data-v1-dl-4965250.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/data-v1-dl-4965250.arff.gz


--------------------------------------------------------------------------------
/.binder/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
2 | --pre
3 | matplotlib
4 | scikit-image
5 | pandas
6 | sphinx-gallery
7 | scikit-learn
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn-modern/theme.conf:
--------------------------------------------------------------------------------
1 | [theme]
2 | inherit = basic
3 | pygments_style = default
4 | stylesheet = css/theme.css
5 | 
6 | [options]
7 | google_analytics = true
8 | mathjax_path =
9 | 


--------------------------------------------------------------------------------
/examples/calibration/README.txt:
--------------------------------------------------------------------------------
1 | .. _calibration_examples:
2 | 
3 | Calibration
4 | -----------------------
5 | 
6 | Examples illustrating the calibration of predicted probabilities of classifiers.
7 | 


--------------------------------------------------------------------------------
/examples/feature_selection/README.txt:
--------------------------------------------------------------------------------
1 | .. _feature_selection_examples:
2 | 
3 | Feature Selection
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.feature_selection` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/data-v1-dl-16826755.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40945/data-v1-dl-16826755.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/data-v1-dl-17928620.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/data-v1-dl-17928620.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/data-v1-dl-21854866.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_42585/data-v1-dl-21854866.arff.gz


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | source = sklearn
 4 | parallel = True
 5 | omit =
 6 |     */sklearn/externals/*
 7 |     */sklearn/_build_utils/*
 8 |     */benchmarks/*
 9 |     **/setup.py
10 | 


--------------------------------------------------------------------------------
/doc/tutorial/common_includes/info.txt:
--------------------------------------------------------------------------------
1 | Meant to share common RST file snippets that we want to reuse by inclusion 
2 | in the real tutorial in order to lower the maintenance burden 
3 | of redundant sections.
4 | 


--------------------------------------------------------------------------------
/examples/cross_decomposition/README.txt:
--------------------------------------------------------------------------------
1 | .. _cross_decomposition_examples:
2 | 
3 | Cross decomposition
4 | -------------------
5 | 
6 | Examples concerning the :mod:`sklearn.cross_decomposition` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/kernel_approximation/README.txt:
--------------------------------------------------------------------------------
1 | .. _kernel_approximation_examples:
2 | 
3 | Kernel Approximation
4 | --------------------
5 | 
6 | Examples concerning the :mod:`sklearn.kernel_approximation` module.
7 | 


--------------------------------------------------------------------------------
/examples/release_highlights/README.txt:
--------------------------------------------------------------------------------
1 | .. _release_highlights_examples:
2 | 
3 | Release Highlights
4 | ------------------
5 | 
6 | These examples illustrate the main features of the releases of scikit-learn.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-dv-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-dv-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-s-act-.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-dv-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-s-act-.json.gz


--------------------------------------------------------------------------------
/.github/labeler-file-extensions.yml:
--------------------------------------------------------------------------------
1 | cython:
2 | - sklearn/**/*.pyx
3 | - sklearn/**/*.pxd
4 | - sklearn/**/*.pxi
5 | # Tempita templates
6 | - sklearn/**/*.pyx.tp
7 | - sklearn/**/*.pxd.tp
8 | - sklearn/**/*.pxi.tp
9 | 


--------------------------------------------------------------------------------
/examples/semi_supervised/README.txt:
--------------------------------------------------------------------------------
1 | .. _semi_supervised_examples:
2 | 
3 | Semi Supervised Classification
4 | ------------------------------
5 | 
6 | Examples concerning the :mod:`sklearn.semi_supervised` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-s-act-.json.gz


--------------------------------------------------------------------------------
/doc/templates/generate_deprecated.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for f in [^d]*; do (head -n2 < $f; echo '
3 | .. meta::
4 |    :robots: noindex
5 | 
6 | .. warning::
7 |    **DEPRECATED**
8 | '; tail -n+3 $f) > deprecated_$f; done
9 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-dv-3.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-dv-3.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-s-act-.json.gz


--------------------------------------------------------------------------------
/lgtm.yml:
--------------------------------------------------------------------------------
1 | extraction:
2 |   cpp:
3 |     before_index:
4 |       - pip3 install numpy==1.16.3
5 |       - pip3 install --no-deps scipy Cython
6 |     index:
7 |       build_command:
8 |         - python3 setup.py build_ext -i
9 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-s-act-.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-s-act-.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz


--------------------------------------------------------------------------------
/examples/gaussian_process/README.txt:
--------------------------------------------------------------------------------
1 | .. _gaussian_process_examples:
2 | 
3 | Gaussian Process for Machine Learning
4 | -------------------------------------
5 | 
6 | Examples concerning the :mod:`sklearn.gaussian_process` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/scikit-learn/HEAD/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz


--------------------------------------------------------------------------------
/sklearn/ensemble/_hist_gradient_boosting/__init__.py:
--------------------------------------------------------------------------------
1 | """This module implements histogram-based gradient boosting estimators.
2 | 
3 | The implementation is a port from pygbm which is itself strongly inspired
4 | from LightGBM.
5 | """
6 | 


--------------------------------------------------------------------------------
/sklearn/svm/src/libsvm/libsvm_template.cpp:
--------------------------------------------------------------------------------
1 | 
2 | /* this is a hack to generate libsvm with both sparse and dense
3 |    methods in the same binary*/
4 | 
5 | #define _DENSE_REP
6 | #include "svm.cpp"
7 | #undef _DENSE_REP
8 | #include "svm.cpp"
9 | 


--------------------------------------------------------------------------------
/examples/applications/README.txt:
--------------------------------------------------------------------------------
1 | .. _realworld_examples:
2 | 
3 | Examples based on real world datasets
4 | -------------------------------------
5 | 
6 | Applications to real world problems with some medium sized datasets or
7 | interactive user interface.
8 | 


--------------------------------------------------------------------------------
/examples/compose/README.txt:
--------------------------------------------------------------------------------
1 | .. _compose_examples:
2 | 
3 | Pipelines and composite estimators
4 | ----------------------------------
5 | 
6 | Examples of how to compose transformers and pipelines from other estimators. See the :ref:`User Guide <combining_estimators>`.
7 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
1 | # Documentation for scikit-learn
2 | 
3 | This directory contains the full manual and website as displayed at
4 | http://scikit-learn.org. See
5 | http://scikit-learn.org/dev/developers/contributing.html#documentation for
6 | detailed information about the documentation. 
7 | 


--------------------------------------------------------------------------------
/doc/modules/pipeline.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | .. raw:: html
 4 | 
 5 |     <meta http-equiv="refresh" content="1; url=./compose.html" />
 6 |     <script>
 7 |       window.location.href = "./compose.html";
 8 |     </script>
 9 | 
10 | This content is now at :ref:`combining_estimators`.
11 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import sklearn
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def print_changed_only_false():
 8 |     sklearn.set_config(print_changed_only=False)
 9 |     yield
10 |     sklearn.set_config(print_changed_only=True)  # reset to default
11 | 


--------------------------------------------------------------------------------
/doc/testimonials/README.txt:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | To find the list of people we contacted, see:
4 | https://docs.google.com/spreadsheet/ccc?key=0AhGnAxuBDhjmdDYwNzlZVE5SMkFsMjNBbGlaWkpNZ1E&usp=sharing
5 | 
6 | To obtain access to this file, send an email to:
7 | nelle dot varoquaux at gmail dot com
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/binder/requirements.txt:
--------------------------------------------------------------------------------
1 | # A binder requirement file is required by sphinx-gallery.
2 | # We don't really need one since our binder requirement file lives in the
3 | # .binder directory.
4 | # This file can be removed if 'dependencies' is made an optional key for
5 | # binder in sphinx-gallery.
6 | 


--------------------------------------------------------------------------------
/doc/templates/class.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}==============
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 | .. include:: {{module}}.{{objname}}.examples
 9 | 
10 | .. raw:: html
11 | 
12 |     <div class="clearer"></div>
13 | 


--------------------------------------------------------------------------------
/sklearn/svm/src/libsvm/_svm_cython_blas_helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SVM_CYTHON_BLAS_HELPERS_H
 2 | #define _SVM_CYTHON_BLAS_HELPERS_H
 3 | 
 4 | typedef double (*dot_func)(int, double*, int, double*, int);
 5 | typedef struct BlasFunctions{
 6 |     dot_func dot;
 7 | } BlasFunctions;
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/doc/templates/function.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}====================
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autofunction:: {{ objname }}
 7 | 
 8 | .. include:: {{module}}.{{objname}}.examples
 9 | 
10 | .. raw:: html
11 | 
12 |     <div class="clearer"></div>
13 | 


--------------------------------------------------------------------------------
/sklearn/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn.experimental` module provides importable modules that enable
3 | the use of experimental features or estimators.
4 | 
5 | The features and estimators that are experimental aren't subject to
6 | deprecation cycles. Use them at your own risks!
7 | """
8 | 


--------------------------------------------------------------------------------
/sklearn/mixture/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.mixture` module implements mixture modeling algorithms.
 3 | """
 4 | 
 5 | from ._gaussian_mixture import GaussianMixture
 6 | from ._bayesian_mixture import BayesianGaussianMixture
 7 | 
 8 | 
 9 | __all__ = ["GaussianMixture", "BayesianGaussianMixture"]
10 | 


--------------------------------------------------------------------------------
/sklearn/externals/README:
--------------------------------------------------------------------------------
1 | This directory contains bundled external dependencies that are updated
2 | every once in a while.
3 | 
4 | Note for distribution packagers: if you want to remove the duplicated
5 | code and depend on a packaged version, we suggest that you simply do a
6 | symbolic link in this directory.
7 | 
8 | 


--------------------------------------------------------------------------------
/doc/templates/numpydoc_docstring.rst:
--------------------------------------------------------------------------------
 1 | {{index}}
 2 | {{summary}}
 3 | {{extended_summary}}
 4 | {{parameters}}
 5 | {{returns}}
 6 | {{yields}}
 7 | {{other_parameters}}
 8 | {{attributes}}
 9 | {{raises}}
10 | {{warns}}
11 | {{warnings}}
12 | {{see_also}}
13 | {{notes}}
14 | {{references}}
15 | {{examples}}
16 | {{methods}}
17 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/_partition_nodes.pxd:
--------------------------------------------------------------------------------
 1 | from ..utils._typedefs cimport DTYPE_t, ITYPE_t
 2 | 
 3 | cdef int partition_node_indices(
 4 |         DTYPE_t *data,
 5 |         ITYPE_t *node_indices,
 6 |         ITYPE_t split_dim,
 7 |         ITYPE_t split_index,
 8 |         ITYPE_t n_features,
 9 |         ITYPE_t n_points) except -1
10 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_classification.txt:
--------------------------------------------------------------------------------
 1 | # comment
 2 | # note: the next line contains a tab
 3 | 1.0 3:2.5 	   11:-5.2 16:1.5 # and an inline comment
 4 | 2.0 6:1.0 13:-3 
 5 | # another comment
 6 | 3.0 21:27
 7 | 4.0 2:1.234567890123456e10 # double precision value
 8 | 1.0     # empty line, all zeros
 9 | 2.0 3:0 # explicit zeros
10 | 


--------------------------------------------------------------------------------
/sklearn/datasets/data/linnerud_exercise.csv:
--------------------------------------------------------------------------------
 1 | Chins Situps Jumps
 2 | 5 162 60
 3 | 2 110 60
 4 | 12 101 101
 5 | 12 105 37
 6 | 13 155 58
 7 | 4 101 42
 8 | 8 101 38
 9 | 6 125 40
10 | 15 200 40
11 | 17 251 250
12 | 17 120 38
13 | 13 210 115
14 | 14 215 105
15 | 1 50 50
16 | 6 70 31
17 | 12 210 120
18 | 4 60 25
19 | 11 230 80
20 | 15 225 73
21 | 2 110 43
22 | 


--------------------------------------------------------------------------------
/sklearn/externals/conftest.py:
--------------------------------------------------------------------------------
1 | # Do not collect any tests in externals. This is more robust than using
2 | # --ignore because --ignore needs a path and it is not convenient to pass in
3 | # the externals path (very long install-dependent path in site-packages) when
4 | # using --pyargs
5 | def pytest_ignore_collect(path, config):
6 |     return True
7 | 
8 | 


--------------------------------------------------------------------------------
/sklearn/datasets/data/linnerud_physiological.csv:
--------------------------------------------------------------------------------
 1 | Weight Waist Pulse
 2 | 191 36 50
 3 | 189 37 52
 4 | 193 38 58
 5 | 162 35 62
 6 | 189 35 46
 7 | 182 36 56
 8 | 211 38 56
 9 | 167 34 60
10 | 176 31 74
11 | 154 33 56
12 | 169 34 50
13 | 166 33 52
14 | 154 34 64
15 | 247 46 50
16 | 193 36 46
17 | 202 37 62
18 | 176 37 54
19 | 157 32 52
20 | 156 33 54
21 | 138 33 68
22 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/_glm/__init__.py:
--------------------------------------------------------------------------------
 1 | # License: BSD 3 clause
 2 | 
 3 | from .glm import (
 4 |     GeneralizedLinearRegressor,
 5 |     PoissonRegressor,
 6 |     GammaRegressor,
 7 |     TweedieRegressor,
 8 | )
 9 | 
10 | __all__ = [
11 |     "GeneralizedLinearRegressor",
12 |     "PoissonRegressor",
13 |     "GammaRegressor",
14 |     "TweedieRegressor",
15 | ]
16 | 


--------------------------------------------------------------------------------
/sklearn/tests/test_check_build.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Smoke Test the check_build module
 3 | """
 4 | 
 5 | # Author: G Varoquaux
 6 | # License: BSD 3 clause
 7 | 
 8 | import pytest
 9 | 
10 | from sklearn.__check_build import raise_build_error
11 | 
12 | 
13 | def test_raise_build_error():
14 |     with pytest.raises(ImportError):
15 |         raise_build_error(ImportError())
16 | 


--------------------------------------------------------------------------------
/sklearn/svm/_newrand.pyx:
--------------------------------------------------------------------------------
 1 | """
 2 | Wrapper for newrand.h
 3 | 
 4 | """
 5 | 
 6 | cdef extern from "newrand.h":
 7 | 	void set_seed(unsigned int)
 8 | 	unsigned int bounded_rand_int(unsigned int)
 9 | 
10 | def set_seed_wrap(unsigned int custom_seed):
11 | 	set_seed(custom_seed)
12 | 
13 | def bounded_rand_int_wrap(unsigned int range_):
14 | 	return bounded_rand_int(range_)
15 | 


--------------------------------------------------------------------------------
/doc/templates/class_with_call.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}===============
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 |    {% block methods %}
 9 |    .. automethod:: __call__
10 |    {% endblock %}
11 | 
12 | .. include:: {{module}}.{{objname}}.examples
13 | 
14 | .. raw:: html
15 | 
16 |     <div class="clearer"></div>
17 | 


--------------------------------------------------------------------------------
/doc/computing.rst:
--------------------------------------------------------------------------------
 1 | .. Places parent toc into the sidebar
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | ============================
 6 | Computing with scikit-learn
 7 | ============================
 8 | 
 9 | .. include:: includes/big_toc_css.rst
10 | 
11 | .. toctree::
12 |     :maxdepth: 2
13 | 
14 |     computing/scaling_strategies
15 |     computing/computational_performance
16 |     computing/parallelism
17 | 


--------------------------------------------------------------------------------
/sklearn/neural_network/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.neural_network` module includes models based on neural
 3 | networks.
 4 | """
 5 | 
 6 | # License: BSD 3 clause
 7 | 
 8 | from ._rbm import BernoulliRBM
 9 | 
10 | from ._multilayer_perceptron import MLPClassifier
11 | from ._multilayer_perceptron import MLPRegressor
12 | 
13 | __all__ = ["BernoulliRBM", "MLPClassifier", "MLPRegressor"]
14 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
1 | # Even if empty this file is useful so that when running from the root folder
2 | # ./sklearn is added to sys.path by pytest. See
3 | # https://docs.pytest.org/en/latest/explanation/pythonpath.html for more
4 | # details. For example, this allows to build extensions in place and run pytest
5 | # doc/modules/clustering.rst and use sklearn from the local folder rather than
6 | # the one from site-packages.
7 | 


--------------------------------------------------------------------------------
/doc/templates/deprecated_function.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}====================
 3 | 
 4 | .. meta::
 5 |    :robots: noindex
 6 | 
 7 | .. warning::
 8 |    **DEPRECATED**
 9 | 
10 | 
11 | .. currentmodule:: {{ module }}
12 | 
13 | .. autofunction:: {{ objname }}
14 | 
15 | .. include:: {{module}}.{{objname}}.examples
16 | 
17 | .. raw:: html
18 | 
19 |     <div class="clearer"></div>
20 | 


--------------------------------------------------------------------------------
/doc/templates/deprecated_class_without_init.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}==============
 3 | 
 4 | .. meta::
 5 |    :robots: noindex
 6 | 
 7 | .. warning::
 8 |    **DEPRECATED**
 9 | 
10 | 
11 | .. currentmodule:: {{ module }}
12 | 
13 | .. autoclass:: {{ objname }}
14 | 
15 | .. include:: {{module}}.{{objname}}.examples
16 | 
17 | .. raw:: html
18 | 
19 |     <div class="clearer"></div>
20 | 


--------------------------------------------------------------------------------
/sklearn/_distributor_init.py:
--------------------------------------------------------------------------------
 1 | """ Distributor init file
 2 | 
 3 | Distributors: you can add custom code here to support particular distributions
 4 | of scikit-learn.
 5 | 
 6 | For example, this is a good place to put any checks for hardware requirements.
 7 | 
 8 | The scikit-learn standard source distribution will not put code in this file,
 9 | so you can safely replace this file with your own version.
10 | """
11 | 


--------------------------------------------------------------------------------
/doc/model_selection.rst:
--------------------------------------------------------------------------------
 1 | .. Places parent toc into the sidebar
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | .. include:: includes/big_toc_css.rst
 6 | 
 7 | .. _model_selection:
 8 | 
 9 | Model selection and evaluation
10 | ------------------------------
11 | 
12 | .. toctree::
13 |     :maxdepth: 2
14 | 
15 |     modules/cross_validation
16 |     modules/grid_search
17 |     modules/model_evaluation
18 |     modules/learning_curve
19 | 


--------------------------------------------------------------------------------
/doc/developers/index.rst:
--------------------------------------------------------------------------------
 1 | .. Places parent toc into the sidebar
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | .. _developers_guide:
 6 | 
 7 | =================
 8 | Developer's Guide
 9 | =================
10 | 
11 | .. include:: ../includes/big_toc_css.rst
12 | .. include:: ../tune_toc.rst
13 | 
14 | .. toctree::
15 | 
16 |    contributing
17 |    develop
18 |    tips
19 |    utilities
20 |    performance
21 |    advanced_installation
22 |    bug_triaging
23 |    maintainer
24 |    plotting
25 | 


--------------------------------------------------------------------------------
/doc/templates/deprecated_class.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}==============
 3 | 
 4 | .. meta::
 5 |    :robots: noindex
 6 | 
 7 | .. warning::
 8 |    **DEPRECATED**
 9 | 
10 | 
11 | .. currentmodule:: {{ module }}
12 | 
13 | .. autoclass:: {{ objname }}
14 | 
15 |    {% block methods %}
16 |    .. automethod:: __init__
17 |    {% endblock %}
18 | 
19 | .. include:: {{module}}.{{objname}}.examples
20 | 
21 | .. raw:: html
22 | 
23 |     <div class="clearer"></div>
24 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn-modern/search.html:
--------------------------------------------------------------------------------
1 | {%- extends "basic/search.html" %}
2 | {% block extrahead %}
3 |   <script type="text/javascript" src="{{ pathto('searchindex.js', 1) }}" defer></script>
4 |   <script src="{{ pathto('_static/underscore.js', 1) }}"></script>
5 |   <script src="{{ pathto('_static/doctools.js', 1) }}"></script>
6 |   <script src="{{ pathto('_static/language_data.js', 1) }}"></script>
7 |   <script src="{{ pathto('_static/js/searchtools.js', 1) }}"></script>
8 | {% endblock %}
9 | 


--------------------------------------------------------------------------------
/sklearn/impute/__init__.py:
--------------------------------------------------------------------------------
 1 | """Transformers for missing value imputation"""
 2 | import typing
 3 | 
 4 | from ._base import MissingIndicator, SimpleImputer
 5 | from ._knn import KNNImputer
 6 | 
 7 | if typing.TYPE_CHECKING:
 8 |     # Avoid errors in type checkers (e.g. mypy) for experimental estimators.
 9 |     # TODO: remove this check once the estimator is no longer experimental.
10 |     from ._iterative import IterativeImputer  # noqa
11 | 
12 | __all__ = ["MissingIndicator", "SimpleImputer", "KNNImputer"]
13 | 


--------------------------------------------------------------------------------
/sklearn/inspection/setup.py:
--------------------------------------------------------------------------------
 1 | from numpy.distutils.misc_util import Configuration
 2 | 
 3 | 
 4 | def configuration(parent_package="", top_path=None):
 5 |     config = Configuration("inspection", parent_package, top_path)
 6 | 
 7 |     config.add_subpackage("_plot")
 8 |     config.add_subpackage("_plot.tests")
 9 | 
10 |     config.add_subpackage("tests")
11 | 
12 |     return config
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     from numpy.distutils.core import setup
17 | 
18 |     setup(**configuration().todict())
19 | 


--------------------------------------------------------------------------------
/doc/contents.rst:
--------------------------------------------------------------------------------
 1 | .. include:: includes/big_toc_css.rst
 2 | .. include:: tune_toc.rst
 3 | 
 4 | .. Places global toc into the sidebar
 5 | 
 6 | :globalsidebartoc: True
 7 | 
 8 | =================
 9 | Table Of Contents
10 | =================
11 | 
12 | .. Define an order for the Table of Contents:
13 | 
14 | .. toctree::
15 |     :maxdepth: 2
16 | 
17 |     preface
18 |     tutorial/index
19 |     getting_started
20 |     user_guide
21 |     glossary
22 |     auto_examples/index
23 |     modules/classes
24 |     developers/index
25 | 


--------------------------------------------------------------------------------
/sklearn/semi_supervised/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.semi_supervised` module implements semi-supervised learning
 3 | algorithms. These algorithms utilize small amounts of labeled data and large
 4 | amounts of unlabeled data for classification tasks. This module includes Label
 5 | Propagation.
 6 | """
 7 | 
 8 | from ._label_propagation import LabelPropagation, LabelSpreading
 9 | from ._self_training import SelfTrainingClassifier
10 | 
11 | __all__ = ["SelfTrainingClassifier", "LabelPropagation", "LabelSpreading"]
12 | 


--------------------------------------------------------------------------------
/doc/templates/deprecated_class_with_call.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}===============
 3 | 
 4 | .. meta::
 5 |    :robots: noindex
 6 | 
 7 | .. warning::
 8 |    **DEPRECATED**
 9 | 
10 | 
11 | .. currentmodule:: {{ module }}
12 | 
13 | .. autoclass:: {{ objname }}
14 | 
15 |    {% block methods %}
16 |    .. automethod:: __init__
17 |    .. automethod:: __call__
18 |    {% endblock %}
19 | 
20 | .. include:: {{module}}.{{objname}}.examples
21 | 
22 | .. raw:: html
23 | 
24 |     <div class="clearer"></div>
25 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/tests/test_kd_tree.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from sklearn.neighbors._kd_tree import KDTree
 5 | 
 6 | DIMENSION = 3
 7 | 
 8 | METRICS = {"euclidean": {}, "manhattan": {}, "chebyshev": {}, "minkowski": dict(p=3)}
 9 | 
10 | 
11 | def test_array_object_type():
12 |     """Check that we do not accept object dtype array."""
13 |     X = np.array([(1, 2, 3), (2, 5), (5, 5, 1, 2)], dtype=object)
14 |     with pytest.raises(ValueError, match="setting an array element with a sequence"):
15 |         KDTree(X)
16 | 


--------------------------------------------------------------------------------
/sklearn/svm/src/liblinear/_cython_blas_helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CYTHON_BLAS_HELPERS_H
 2 | #define _CYTHON_BLAS_HELPERS_H
 3 | 
 4 | typedef double (*dot_func)(int, double*, int, double*, int);
 5 | typedef void (*axpy_func)(int, double, double*, int, double*, int);
 6 | typedef void (*scal_func)(int, double, double*, int);
 7 | typedef double (*nrm2_func)(int, double*, int);
 8 | 
 9 | typedef struct BlasFunctions{
10 |     dot_func dot;
11 |     axpy_func axpy;
12 |     scal_func scal;
13 |     nrm2_func nrm2;
14 | } BlasFunctions;
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/sklearn/experimental/tests/test_enable_hist_gradient_boosting.py:
--------------------------------------------------------------------------------
 1 | """Tests for making sure experimental imports work as expected."""
 2 | 
 3 | import textwrap
 4 | 
 5 | from sklearn.utils._testing import assert_run_python_script
 6 | 
 7 | 
 8 | def test_import_raises_warning():
 9 |     code = """
10 |     import pytest
11 |     with pytest.warns(UserWarning, match="it is not needed to import"):
12 |         from sklearn.experimental import enable_hist_gradient_boosting  # noqa
13 |     """
14 |     assert_run_python_script(textwrap.dedent(code))
15 | 


--------------------------------------------------------------------------------
/sklearn/inspection/__init__.py:
--------------------------------------------------------------------------------
 1 | """The :mod:`sklearn.inspection` module includes tools for model inspection."""
 2 | 
 3 | 
 4 | from ._permutation_importance import permutation_importance
 5 | 
 6 | from ._partial_dependence import partial_dependence
 7 | from ._plot.partial_dependence import plot_partial_dependence
 8 | from ._plot.partial_dependence import PartialDependenceDisplay
 9 | 
10 | 
11 | __all__ = [
12 |     "partial_dependence",
13 |     "plot_partial_dependence",
14 |     "permutation_importance",
15 |     "PartialDependenceDisplay",
16 | ]
17 | 


--------------------------------------------------------------------------------
/sklearn/feature_extraction/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.feature_extraction` module deals with feature extraction
 3 | from raw data. It currently includes methods to extract features from text and
 4 | images.
 5 | """
 6 | 
 7 | from ._dict_vectorizer import DictVectorizer
 8 | from ._hash import FeatureHasher
 9 | from .image import img_to_graph, grid_to_graph
10 | from . import text
11 | 
12 | __all__ = [
13 |     "DictVectorizer",
14 |     "image",
15 |     "img_to_graph",
16 |     "grid_to_graph",
17 |     "text",
18 |     "FeatureHasher",
19 | ]
20 | 


--------------------------------------------------------------------------------
/doc/unsupervised_learning.rst:
--------------------------------------------------------------------------------
 1 | .. Places parent toc into the sidebar
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | .. include:: includes/big_toc_css.rst
 6 | 
 7 | .. _unsupervised-learning:
 8 | 
 9 | Unsupervised learning
10 | -----------------------
11 | 
12 | .. toctree::
13 |     :maxdepth: 2
14 | 
15 |     modules/mixture
16 |     modules/manifold
17 |     modules/clustering
18 |     modules/biclustering
19 |     modules/decomposition
20 |     modules/covariance
21 |     modules/outlier_detection
22 |     modules/density
23 |     modules/neural_networks_unsupervised
24 | 


--------------------------------------------------------------------------------
/sklearn/utils/_typedefs.pxd:
--------------------------------------------------------------------------------
 1 | #!python
 2 | cimport numpy as np
 3 | 
 4 | # Floating point/data type
 5 | ctypedef np.float64_t DTYPE_t  # WARNING: should match DTYPE in typedefs.pyx
 6 | 
 7 | cdef enum:
 8 |     DTYPECODE = np.NPY_FLOAT64
 9 |     ITYPECODE = np.NPY_INTP
10 | 
11 | # Index/integer type.
12 | #  WARNING: ITYPE_t must be a signed integer type or you will have a bad time!
13 | ctypedef np.intp_t ITYPE_t  # WARNING: should match ITYPE in typedefs.pyx
14 | 
15 | # Fused type for certain operations
16 | ctypedef fused DITYPE_t:
17 |     ITYPE_t
18 |     DTYPE_t
19 | 


--------------------------------------------------------------------------------
/sklearn/tests/test_init.py:
--------------------------------------------------------------------------------
 1 | # Basic unittests to test functioning of module's top-level
 2 | 
 3 | 
 4 | __author__ = "Yaroslav Halchenko"
 5 | __license__ = "BSD"
 6 | 
 7 | 
 8 | try:
 9 |     from sklearn import *  # noqa
10 | 
11 |     _top_import_error = None
12 | except Exception as e:
13 |     _top_import_error = e
14 | 
15 | 
16 | def test_import_skl():
17 |     # Test either above import has failed for some reason
18 |     # "import *" is discouraged outside of the module level, hence we
19 |     # rely on setting up the variable above
20 |     assert _top_import_error is None
21 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_arpack.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from numpy.testing import assert_allclose
 3 | 
 4 | from sklearn.utils import check_random_state
 5 | from sklearn.utils._arpack import _init_arpack_v0
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("seed", range(100))
 9 | def test_init_arpack_v0(seed):
10 |     # check that the initialization a sampling from an uniform distribution
11 |     # where we can fix the random state
12 |     size = 1000
13 |     v0 = _init_arpack_v0(size, seed)
14 | 
15 |     rng = check_random_state(seed)
16 |     assert_allclose(v0, rng.uniform(-1, 1, size=size))
17 | 


--------------------------------------------------------------------------------
/.github/workflows/check-manifest.yml:
--------------------------------------------------------------------------------
 1 | name: "Check Manifest"
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 0 * * *'
 6 | 
 7 | jobs:
 8 |   check:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v2
12 |       - uses: actions/setup-python@v2
13 |         with:
14 |           python-version: '3.9'
15 |       - name: Install dependencies
16 |         # scipy and cython are required to build sdist
17 |         run: |
18 |           python -m pip install --upgrade pip
19 |           pip install check-manifest scipy cython
20 |       - run: |
21 |           check-manifest -v
22 | 


--------------------------------------------------------------------------------
/sklearn/compose/__init__.py:
--------------------------------------------------------------------------------
 1 | """Meta-estimators for building composite models with transformers
 2 | 
 3 | In addition to its current contents, this module will eventually be home to
 4 | refurbished versions of Pipeline and FeatureUnion.
 5 | 
 6 | """
 7 | 
 8 | from ._column_transformer import (
 9 |     ColumnTransformer,
10 |     make_column_transformer,
11 |     make_column_selector,
12 | )
13 | from ._target import TransformedTargetRegressor
14 | 
15 | 
16 | __all__ = [
17 |     "ColumnTransformer",
18 |     "make_column_transformer",
19 |     "TransformedTargetRegressor",
20 |     "make_column_selector",
21 | ]
22 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | | Version   | Supported          |
 6 | | --------- | ------------------ |
 7 | | 1.0.1     | :white_check_mark: |
 8 | | < 1.0.1   | :x:                |
 9 | 
10 | ## Reporting a Vulnerability
11 | 
12 | Please report security vulnerabilities by email to `security@scikit-learn.org`.
13 | This email is an alias to a subset of the scikit-learn maintainers' team.
14 | 
15 | If the security vulnerability is accepted, a patch will be crafted privately
16 | in order to prepare a dedicated bugfix release as timely as possible (depending
17 | on the complexity of the fix).
18 | 


--------------------------------------------------------------------------------
/doc/communication_team.rst:
--------------------------------------------------------------------------------
 1 | .. raw :: html
 2 | 
 3 |     <!-- Generated by generate_authors_table.py -->
 4 |     <div class="sk-authors-container">
 5 |     <style>
 6 |       img.avatar {border-radius: 10px;}
 7 |     </style>
 8 |     <div>
 9 |     <a href='https://github.com/reshamas'><img src='https://avatars.githubusercontent.com/u/2507232?v=4' class='avatar' /></a> <br />
10 |     <p>Reshama Shaikh</p>
11 |     </div>
12 |     <div>
13 |     <a href='https://github.com/laurburke'><img src='https://avatars.githubusercontent.com/u/35973528?v=4' class='avatar' /></a> <br />
14 |     <p>Lauren Burke</p>
15 |     </div>
16 |     </div>
17 | 


--------------------------------------------------------------------------------
/doc/whats_new/changelog_legend.inc:
--------------------------------------------------------------------------------
 1 | Legend for changelogs
 2 | ---------------------
 3 | 
 4 | - |MajorFeature|: something big that you couldn't do before.
 5 | - |Feature|: something that you couldn't do before.
 6 | - |Efficiency|: an existing feature now may not require as much computation or
 7 |   memory.
 8 | - |Enhancement|: a miscellaneous minor improvement.
 9 | - |Fix|: something that previously didn't work as documentated -- or according
10 |   to reasonable expectations -- should now work.
11 | - |API|: you will need to change your code to have the same effect in the
12 |   future; or a feature will be removed in the future.
13 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """ Network tests are only run, if data is already locally available,
 2 | or if download is specifically requested by environment variable."""
 3 | import builtins
 4 | import pytest
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def hide_available_pandas(monkeypatch):
 9 |     """Pretend pandas was not installed."""
10 |     import_orig = builtins.__import__
11 | 
12 |     def mocked_import(name, *args, **kwargs):
13 |         if name == "pandas":
14 |             raise ImportError()
15 |         return import_orig(name, *args, **kwargs)
16 | 
17 |     monkeypatch.setattr(builtins, "__import__", mocked_import)
18 | 


--------------------------------------------------------------------------------
/.github/workflows/unassign.yml:
--------------------------------------------------------------------------------
 1 | name: Unassign
 2 | #Runs when a contributor has unassigned themselves from the issue and adds 'help wanted'
 3 | on:
 4 |   issues:
 5 |     types: unassigned
 6 | 
 7 | jobs:
 8 |   one:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name:
12 |         if: github.event.issue.state == 'open'
13 |         run: |
14 |           echo "Marking issue ${{ github.event.issue.number }} as help wanted"
15 |           curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"labels": ["help wanted"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels
16 | 


--------------------------------------------------------------------------------
/sklearn/gaussian_process/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 4 | #         Vincent Dubourg <vincent.dubourg@gmail.com>
 5 | #         (mostly translation, see implementation details)
 6 | # License: BSD 3 clause
 7 | 
 8 | """
 9 | The :mod:`sklearn.gaussian_process` module implements Gaussian Process
10 | based regression and classification.
11 | """
12 | 
13 | from ._gpr import GaussianProcessRegressor
14 | from ._gpc import GaussianProcessClassifier
15 | from . import kernels
16 | 
17 | 
18 | __all__ = ["GaussianProcessRegressor", "GaussianProcessClassifier", "kernels"]
19 | 


--------------------------------------------------------------------------------
/sklearn/manifold/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.manifold` module implements data embedding techniques.
 3 | """
 4 | 
 5 | from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding
 6 | from ._isomap import Isomap
 7 | from ._mds import MDS, smacof
 8 | from ._spectral_embedding import SpectralEmbedding, spectral_embedding
 9 | from ._t_sne import TSNE, trustworthiness
10 | 
11 | __all__ = [
12 |     "locally_linear_embedding",
13 |     "LocallyLinearEmbedding",
14 |     "Isomap",
15 |     "MDS",
16 |     "smacof",
17 |     "SpectralEmbedding",
18 |     "spectral_embedding",
19 |     "TSNE",
20 |     "trustworthiness",
21 | ]
22 | 


--------------------------------------------------------------------------------
/sklearn/__check_build/setup.py:
--------------------------------------------------------------------------------
 1 | # Author: Virgile Fritsch <virgile.fritsch@inria.fr>
 2 | # License: BSD 3 clause
 3 | 
 4 | import numpy
 5 | 
 6 | 
 7 | def configuration(parent_package="", top_path=None):
 8 |     from numpy.distutils.misc_util import Configuration
 9 | 
10 |     config = Configuration("__check_build", parent_package, top_path)
11 |     config.add_extension(
12 |         "_check_build", sources=["_check_build.pyx"], include_dirs=[numpy.get_include()]
13 |     )
14 | 
15 |     return config
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     from numpy.distutils.core import setup
20 | 
21 |     setup(**configuration(top_path="").todict())
22 | 


--------------------------------------------------------------------------------
/sklearn/preprocessing/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def configuration(parent_package="", top_path=None):
 5 |     import numpy
 6 |     from numpy.distutils.misc_util import Configuration
 7 | 
 8 |     config = Configuration("preprocessing", parent_package, top_path)
 9 |     libraries = []
10 |     if os.name == "posix":
11 |         libraries.append("m")
12 | 
13 |     config.add_extension(
14 |         "_csr_polynomial_expansion",
15 |         sources=["_csr_polynomial_expansion.pyx"],
16 |         include_dirs=[numpy.get_include()],
17 |         libraries=libraries,
18 |     )
19 | 
20 |     config.add_subpackage("tests")
21 | 
22 |     return config
23 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v2.3.0
 4 |     hooks:
 5 |     -   id: check-yaml
 6 |     -   id: end-of-file-fixer
 7 |     -   id: trailing-whitespace
 8 | -   repo: https://github.com/psf/black
 9 |     rev: 21.6b0
10 |     hooks:
11 |     -   id: black
12 | -   repo: https://gitlab.com/pycqa/flake8
13 |     rev: 3.9.2
14 |     hooks:
15 |     -   id: flake8
16 |         types: [file, python]
17 | -   repo: https://github.com/pre-commit/mirrors-mypy
18 |     rev: v0.782
19 |     hooks:
20 |      -  id: mypy
21 |         files: sklearn/
22 |         additional_dependencies: [pytest==6.2.4]
23 | 


--------------------------------------------------------------------------------
/.github/workflows/labeler-title-regex.yml:
--------------------------------------------------------------------------------
 1 | name: Pull Request Regex Title Labeler
 2 | on:
 3 |   pull_request_target:
 4 |     types: [opened, edited]
 5 | 
 6 | permissions:
 7 |   contents: read
 8 |   pull-requests: write
 9 | 
10 | jobs:
11 | 
12 |   labeler:
13 |     runs-on: ubuntu-20.04
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - uses: actions/setup-python@v2
17 |       with:
18 |         python-version: '3.9'
19 |     - name: Install PyGithub
20 |       run: pip install -Uq PyGithub
21 |     - name: Label pull request
22 |       run: python .github/scripts/label_title_regex.py
23 |       env:
24 |         CONTEXT_GITHUB: ${{ toJson(github) }}
25 | 


--------------------------------------------------------------------------------
/doc/user_guide.rst:
--------------------------------------------------------------------------------
 1 | .. Places parent toc into the sidebar
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | .. title:: User guide: contents
 6 | 
 7 | .. _user_guide:
 8 | 
 9 | ==========
10 | User Guide
11 | ==========
12 | 
13 | .. include:: includes/big_toc_css.rst
14 | 
15 | .. nice layout in the toc
16 | 
17 | .. include:: tune_toc.rst
18 | 
19 | .. toctree::
20 |    :numbered:
21 |    :maxdepth: 3
22 | 
23 |    supervised_learning.rst
24 |    unsupervised_learning.rst
25 |    model_selection.rst
26 |    inspection.rst
27 |    visualizations.rst
28 |    data_transforms.rst
29 |    datasets.rst
30 |    computing.rst
31 |    modules/model_persistence.rst
32 |    common_pitfalls.rst
33 | 


--------------------------------------------------------------------------------
/sklearn/utils/_fast_dict.pxd:
--------------------------------------------------------------------------------
 1 | # Author: Gael Varoquaux
 2 | # License: BSD
 3 | """
 4 | Uses C++ map containers for fast dict-like behavior with keys being
 5 | integers, and values float.
 6 | """
 7 | 
 8 | from libcpp.map cimport map as cpp_map
 9 | 
10 | # Import the C-level symbols of numpy
11 | cimport numpy as np
12 | 
13 | ctypedef np.float64_t DTYPE_t
14 | 
15 | ctypedef np.intp_t ITYPE_t
16 | 
17 | ###############################################################################
18 | # An object to be used in Python
19 | 
20 | cdef class IntFloatDict:
21 |     cdef cpp_map[ITYPE_t, DTYPE_t] my_map
22 |     cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values)
23 | 


--------------------------------------------------------------------------------
/doc/templates/redirects.html:
--------------------------------------------------------------------------------
 1 | {% set redirect = pathto(redirects[pagename]) %}
 2 | <!DOCTYPE html>
 3 | <html>
 4 |   <head>
 5 |     <meta charset="utf-8">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <meta http-equiv="Refresh" content="0; url={{ redirect }}" />
 8 |     <meta name="Description" content="scikit-learn: machine learning in Python">
 9 |     <link rel="canonical" href="{{ redirect }}" />
10 |     <title>scikit-learn: machine learning in Python</title>
11 |   </head>
12 |   <body>
13 |     <p>You will be automatically redirected to the <a href="{{ redirect }}">new location of this page</a>.</p>
14 |   </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/doc_improvement.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation improvement
 2 | description: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change.
 3 | labels: [Documentation]
 4 | 
 5 | body:
 6 | - type: textarea
 7 |   attributes:
 8 |     label: Describe the issue linked to the documentation
 9 |     description: >
10 |       Tell us about the confusion introduced in the documentation.
11 |   validations:
12 |     required: true
13 | - type: textarea
14 |   attributes:
15 |     label: Suggest a potential alternative/fix
16 |     description: >
17 |       Tell us how we could improve the documentation in this regard.
18 | 


--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/.gitignore:
--------------------------------------------------------------------------------
 1 | # cruft
 2 | .*.swp
 3 | *.pyc
 4 | .DS_Store
 5 | *.pdf
 6 | 
 7 | # folder to be used for working on the exercises
 8 | workspace
 9 | 
10 | # output of the sphinx build of the documentation
11 | tutorial/_build
12 | 
13 | # datasets to be fetched from the web and cached locally
14 | data/twenty_newsgroups/20news-bydate.tar.gz
15 | data/twenty_newsgroups/20news-bydate-train
16 | data/twenty_newsgroups/20news-bydate-test
17 | 
18 | data/movie_reviews/txt_sentoken
19 | data/movie_reviews/poldata.README.2.0
20 | 
21 | data/languages/paragraphs
22 | data/languages/short_paragraphs
23 | data/languages/html
24 | 
25 | data/labeled_faces_wild/lfw_preprocessed/
26 | 


--------------------------------------------------------------------------------
/doc/preface.rst:
--------------------------------------------------------------------------------
 1 | .. This helps define the TOC ordering for "about us" sections. Particularly
 2 |    useful for PDF output as this section is not linked from elsewhere.
 3 | 
 4 | .. Places global toc into the sidebar
 5 | 
 6 | :globalsidebartoc: True
 7 | 
 8 | .. _preface_menu:
 9 | 
10 | .. include:: includes/big_toc_css.rst
11 | .. include:: tune_toc.rst
12 | 
13 | =======================
14 | Welcome to scikit-learn
15 | =======================
16 | 
17 | |
18 | 
19 | .. toctree::
20 |     :maxdepth: 2
21 | 
22 |     install
23 |     faq
24 |     support
25 |     related_projects
26 |     about
27 |     testimonials/testimonials
28 |     whats_new
29 |     roadmap
30 |     governance
31 | 
32 | |
33 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/_distance_metric.py:
--------------------------------------------------------------------------------
 1 | # TODO: Remove this file in 1.3
 2 | import warnings
 3 | 
 4 | from ..metrics import DistanceMetric as _DistanceMetric
 5 | 
 6 | 
 7 | class DistanceMetric(_DistanceMetric):
 8 |     @classmethod
 9 |     def _warn(cls):
10 |         warnings.warn(
11 |             "sklearn.neighbors.DistanceMetric has been moved "
12 |             "to sklearn.metrics.DistanceMetric in 1.0. "
13 |             "This import path will be removed in 1.3",
14 |             category=FutureWarning,
15 |         )
16 | 
17 |     @classmethod
18 |     def get_metric(cls, metric, **kwargs):
19 |         DistanceMetric._warn()
20 |         return _DistanceMetric.get_metric(metric, **kwargs)
21 | 


--------------------------------------------------------------------------------
/sklearn/tree/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.tree` module includes decision tree-based models for
 3 | classification and regression.
 4 | """
 5 | 
 6 | from ._classes import BaseDecisionTree
 7 | from ._classes import DecisionTreeClassifier
 8 | from ._classes import DecisionTreeRegressor
 9 | from ._classes import ExtraTreeClassifier
10 | from ._classes import ExtraTreeRegressor
11 | from ._export import export_graphviz, plot_tree, export_text
12 | 
13 | __all__ = [
14 |     "BaseDecisionTree",
15 |     "DecisionTreeClassifier",
16 |     "DecisionTreeRegressor",
17 |     "ExtraTreeClassifier",
18 |     "ExtraTreeRegressor",
19 |     "export_graphviz",
20 |     "plot_tree",
21 |     "export_text",
22 | ]
23 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/_sgd_fast_helpers.h:
--------------------------------------------------------------------------------
 1 | // We cannot directly reuse the npy_isfinite from npy_math.h as numpy
 2 | // and scikit-learn are not necessarily built with the same compiler.
 3 | // When re-declaring the functions in the template for cython
 4 | // specific for each parameter input type, it needs to be 2 different functions
 5 | // as cython doesn't support function overloading.
 6 | #ifdef _MSC_VER
 7 | # include <float.h>
 8 | # define skl_isfinite _finite
 9 | # define skl_isfinite32 _finite
10 | # define skl_isfinite64 _finite
11 | #else
12 | # include <numpy/npy_math.h>
13 | # define skl_isfinite npy_isfinite
14 | # define skl_isfinite32 npy_isfinite
15 | # define skl_isfinite64 npy_isfinite
16 | #endif
17 | 


--------------------------------------------------------------------------------
/sklearn/svm/src/libsvm/LIBSVM_CHANGES:
--------------------------------------------------------------------------------
 1 | Changes to Libsvm
 2 | 
 3 | This is here mainly as checklist for incorporation of new versions of libsvm.
 4 | 
 5 |   * Add copyright to files svm.cpp and svm.h
 6 |   * Add random_seed support and call to srand in fit function
 7 |   * Improved random number generator (fix on windows, enhancement on other
 8 |     platforms). See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
 9 |   * invoke scipy blas api for svm kernel function to improve performance with speedup rate of 1.5X to 2X for dense data only. See <https://github.com/scikit-learn/scikit-learn/pull/16530>
10 | The changes made with respect to upstream are detailed in the heading of svm.cpp
11 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | We are a community based on openness, as well as friendly and didactic discussions.
 4 | 
 5 | We aspire to treat everybody equally, and value their contributions.
 6 | 
 7 | Decisions are made based on technical merit and consensus.
 8 | 
 9 | Code is not the only way to help the project. Reviewing pull requests,
10 | answering questions to help others on mailing lists or issues, organizing and
11 | teaching tutorials, working on the website, improving the documentation, are
12 | all priceless contributions.
13 | 
14 | We abide by the principles of openness, respect, and consideration of others of
15 | the Python Software Foundation: https://www.python.org/psf/codeofconduct/
16 | 
17 | 


--------------------------------------------------------------------------------
/sklearn/feature_extraction/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import platform
 3 | 
 4 | 
 5 | def configuration(parent_package="", top_path=None):
 6 |     import numpy
 7 |     from numpy.distutils.misc_util import Configuration
 8 | 
 9 |     config = Configuration("feature_extraction", parent_package, top_path)
10 |     libraries = []
11 |     if os.name == "posix":
12 |         libraries.append("m")
13 | 
14 |     if platform.python_implementation() != "PyPy":
15 |         config.add_extension(
16 |             "_hashing_fast",
17 |             sources=["_hashing_fast.pyx"],
18 |             include_dirs=[numpy.get_include()],
19 |             libraries=libraries,
20 |         )
21 |     config.add_subpackage("tests")
22 | 
23 |     return config
24 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: ['https://numfocus.org/donate-to-scikit-learn']
13 | 


--------------------------------------------------------------------------------
/doc/authors_emeritus.rst:
--------------------------------------------------------------------------------
 1 | - Mathieu Blondel
 2 | - Matthieu Brucher
 3 | - Lars Buitinck
 4 | - David Cournapeau
 5 | - Noel Dawe
 6 | - Vincent Dubourg
 7 | - Edouard Duchesnay
 8 | - Alexander Fabisch
 9 | - Virgile Fritsch
10 | - Satrajit Ghosh
11 | - Angel Soler Gollonet
12 | - Chris Gorgolewski
13 | - Jaques Grobler
14 | - Brian Holt
15 | - Arnaud Joly
16 | - Thouis (Ray) Jones
17 | - Kyle Kastner
18 | - manoj kumar
19 | - Robert Layton
20 | - Wei Li
21 | - Paolo Losi
22 | - Gilles Louppe
23 | - Vincent Michel
24 | - Jarrod Millman
25 | - Alexandre Passos
26 | - Fabian Pedregosa
27 | - Peter Prettenhofer
28 | - (Venkat) Raghav, Rajagopalan
29 | - Jacob Schreiber
30 | - Du Shiqiao
31 | - Jake Vanderplas
32 | - David Warde-Farley
33 | - Ron Weiss
34 | 


--------------------------------------------------------------------------------
/sklearn/svm/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.svm` module includes Support Vector Machine algorithms.
 3 | """
 4 | 
 5 | # See http://scikit-learn.sourceforge.net/modules/svm.html for complete
 6 | # documentation.
 7 | 
 8 | # Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> with help from
 9 | #         the scikit-learn community. LibSVM and LibLinear are copyright
10 | #         of their respective owners.
11 | # License: BSD 3 clause (C) INRIA 2010
12 | 
13 | from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR
14 | from ._bounds import l1_min_c
15 | 
16 | __all__ = [
17 |     "LinearSVC",
18 |     "LinearSVR",
19 |     "NuSVC",
20 |     "NuSVR",
21 |     "OneClassSVM",
22 |     "SVC",
23 |     "SVR",
24 |     "l1_min_c",
25 | ]
26 | 


--------------------------------------------------------------------------------
/.github/scripts/label_title_regex.py:
--------------------------------------------------------------------------------
 1 | """Labels PRs based on title. Must be run in a github action with the
 2 | pull_request_target event."""
 3 | from github import Github
 4 | import os
 5 | import json
 6 | import re
 7 | 
 8 | context_dict = json.loads(os.getenv("CONTEXT_GITHUB"))
 9 | 
10 | repo = context_dict["repository"]
11 | g = Github(context_dict["token"])
12 | repo = g.get_repo(repo)
13 | pr_number = context_dict["event"]["number"]
14 | issue = repo.get_issue(number=pr_number)
15 | title = issue.title
16 | 
17 | 
18 | regex_to_labels = [(r"\bDOC\b", "Documentation"), (r"\bCI\b", "Build / CI")]
19 | 
20 | labels_to_add = [label for regex, label in regex_to_labels if re.search(regex, title)]
21 | 
22 | if labels_to_add:
23 |     issue.add_to_labels(*labels_to_add)
24 | 


--------------------------------------------------------------------------------
/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd:
--------------------------------------------------------------------------------
 1 | from .common cimport X_BINNED_DTYPE_C
 2 | from .common cimport BITSET_DTYPE_C
 3 | from .common cimport BITSET_INNER_DTYPE_C
 4 | from .common cimport X_DTYPE_C
 5 | 
 6 | cdef void init_bitset(BITSET_DTYPE_C bitset) nogil
 7 | 
 8 | cdef void set_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) nogil
 9 | 
10 | cdef unsigned char in_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) nogil
11 | 
12 | cpdef unsigned char in_bitset_memoryview(const BITSET_INNER_DTYPE_C[:] bitset,
13 |                                          X_BINNED_DTYPE_C val) nogil
14 | 
15 | cdef unsigned char in_bitset_2d_memoryview(
16 |     const BITSET_INNER_DTYPE_C [:, :] bitset,
17 |     X_BINNED_DTYPE_C val,
18 |     unsigned int row) nogil
19 | 


--------------------------------------------------------------------------------
/sklearn/metrics/cluster/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy
 4 | from numpy.distutils.misc_util import Configuration
 5 | 
 6 | 
 7 | def configuration(parent_package="", top_path=None):
 8 |     config = Configuration("cluster", parent_package, top_path)
 9 |     libraries = []
10 |     if os.name == "posix":
11 |         libraries.append("m")
12 |     config.add_extension(
13 |         "_expected_mutual_info_fast",
14 |         sources=["_expected_mutual_info_fast.pyx"],
15 |         include_dirs=[numpy.get_include()],
16 |         libraries=libraries,
17 |     )
18 | 
19 |     config.add_subpackage("tests")
20 | 
21 |     return config
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     from numpy.distutils.core import setup
26 | 
27 |     setup(**configuration().todict())
28 | 


--------------------------------------------------------------------------------
/sklearn/model_selection/tests/common.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Common utilities for testing model selection.
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | from sklearn.model_selection import KFold
 8 | 
 9 | 
10 | class OneTimeSplitter:
11 |     """A wrapper to make KFold single entry cv iterator"""
12 | 
13 |     def __init__(self, n_splits=4, n_samples=99):
14 |         self.n_splits = n_splits
15 |         self.n_samples = n_samples
16 |         self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples)))
17 | 
18 |     def split(self, X=None, y=None, groups=None):
19 |         """Split can be called only once"""
20 |         for index in self.indices:
21 |             yield index
22 | 
23 |     def get_n_splits(self, X=None, y=None, groups=None):
24 |         return self.n_splits
25 | 


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
 1 | # Since git version 2.23, git-blame has a feature to ignore
 2 | # certain commits.
 3 | #
 4 | # This file contains a list of commits that are not likely what
 5 | # you are looking for in `git blame`. You can set this file as
 6 | # a default ignore file for blame by running the following
 7 | # command.
 8 | #
 9 | # $ git config blame.ignoreRevsFile .git-blame-ignore-revs
10 | 
11 | # PR 18948: Migrate code style to Black
12 | 82df48934eba1df9a1ed3be98aaace8eada59e6e
13 | 
14 | # PR 20294: Use target_version >= 3.7 in Black
15 | 351ace7935a4ea685171cc6d174890f08facd561
16 | 
17 | # PR 20412: Use experimental_string_processing=true in Black
18 | 3ae7c7615343bbd36acece57825d8b0d70fd9da4
19 | 
20 | # PR 20502: Runs Black on examples
21 | 70a185ae59b4362633d18b0d0083abb1b6f7370c
22 | 


--------------------------------------------------------------------------------
/doc/supervised_learning.rst:
--------------------------------------------------------------------------------
 1 | .. Places parent toc into the sidebar
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | .. include:: includes/big_toc_css.rst
 6 | 
 7 | .. _supervised-learning:
 8 | 
 9 | Supervised learning
10 | -----------------------
11 | 
12 | .. toctree::
13 |     :maxdepth: 2
14 | 
15 |     modules/linear_model
16 |     modules/lda_qda.rst
17 |     modules/kernel_ridge.rst
18 |     modules/svm
19 |     modules/sgd
20 |     modules/neighbors
21 |     modules/gaussian_process
22 |     modules/cross_decomposition.rst
23 |     modules/naive_bayes
24 |     modules/tree
25 |     modules/ensemble
26 |     modules/multiclass
27 |     modules/feature_selection.rst
28 |     modules/semi_supervised.rst
29 |     modules/isotonic.rst
30 |     modules/calibration.rst
31 |     modules/neural_networks_supervised
32 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_weight_vector.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from sklearn.utils._weight_vector import (
 4 |     WeightVector32,
 5 |     WeightVector64,
 6 | )
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "dtype, WeightVector",
11 |     [
12 |         (np.float32, WeightVector32),
13 |         (np.float64, WeightVector64),
14 |     ],
15 | )
16 | def test_type_invariance(dtype, WeightVector):
17 |     """Check the `dtype` consistency of `WeightVector`."""
18 |     weights = np.random.rand(100).astype(dtype)
19 |     average_weights = np.random.rand(100).astype(dtype)
20 | 
21 |     weight_vector = WeightVector(weights, average_weights)
22 | 
23 |     assert np.asarray(weight_vector.w).dtype is np.dtype(dtype)
24 |     assert np.asarray(weight_vector.aw).dtype is np.dtype(dtype)
25 | 


--------------------------------------------------------------------------------
/sklearn/experimental/enable_iterative_imputer.py:
--------------------------------------------------------------------------------
 1 | """Enables IterativeImputer
 2 | 
 3 | The API and results of this estimator might change without any deprecation
 4 | cycle.
 5 | 
 6 | Importing this file dynamically sets :class:`~sklearn.impute.IterativeImputer`
 7 | as an attribute of the impute module::
 8 | 
 9 |     >>> # explicitly require this experimental feature
10 |     >>> from sklearn.experimental import enable_iterative_imputer  # noqa
11 |     >>> # now you can import normally from impute
12 |     >>> from sklearn.impute import IterativeImputer
13 | """
14 | 
15 | from ..impute._iterative import IterativeImputer
16 | from .. import impute
17 | 
18 | # use settattr to avoid mypy errors when monkeypatching
19 | setattr(impute, "IterativeImputer", IterativeImputer)
20 | impute.__all__ += ["IterativeImputer"]
21 | 


--------------------------------------------------------------------------------
/.github/workflows/labeler-module.yml:
--------------------------------------------------------------------------------
 1 | name: "Pull Request Labeler"
 2 | on: pull_request_target
 3 | 
 4 | jobs:
 5 |   triage:
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |     - uses: thomasjpfan/labeler@v2.5.0
 9 |       continue-on-error: true
10 |       if: github.repository == 'scikit-learn/scikit-learn'
11 |       with:
12 |         repo-token: "${{ secrets.GITHUB_TOKEN }}"
13 |         max-labels: "3"
14 |         configuration-path: ".github/labeler-module.yml"
15 | 
16 |   triage_file_extensions:
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |     - uses: thomasjpfan/labeler@v2.5.0
20 |       continue-on-error: true
21 |       if: github.repository == 'scikit-learn/scikit-learn'
22 |       with:
23 |         repo-token: "${{ secrets.GITHUB_TOKEN }}"
24 |         configuration-path: ".github/labeler-file-extensions.yml"


--------------------------------------------------------------------------------
/sklearn/cluster/_k_means_common.pxd:
--------------------------------------------------------------------------------
 1 | from cython cimport floating
 2 | cimport numpy as np
 3 | 
 4 | 
 5 | cdef floating _euclidean_dense_dense(floating*, floating*, int, bint) nogil
 6 | 
 7 | cdef floating _euclidean_sparse_dense(floating[::1], int[::1], floating[::1],
 8 |                                       floating, bint) nogil
 9 | 
10 | cpdef void _relocate_empty_clusters_dense(
11 |     floating[:, ::1], floating[::1], floating[:, ::1],
12 |     floating[:, ::1], floating[::1], int[::1])
13 | 
14 | cpdef void _relocate_empty_clusters_sparse(
15 |     floating[::1], int[::1], int[::1], floating[::1], floating[:, ::1],
16 |     floating[:, ::1], floating[::1], int[::1])
17 | 
18 | cdef void _average_centers(floating[:, ::1], floating[::1])
19 | 
20 | cdef void _center_shift(floating[:, ::1], floating[:, ::1], floating[::1])
21 | 


--------------------------------------------------------------------------------
/sklearn/datasets/images/README.txt:
--------------------------------------------------------------------------------
 1 | Image: china.jpg
 2 | Released under a creative commons license. [1]
 3 | Attribution: Some rights reserved by danielbuechele [2]
 4 | Retrieved 21st August, 2011 from [3] by Robert Layton
 5 | 
 6 | [1] https://creativecommons.org/licenses/by/2.0/
 7 | [2] https://www.flickr.com/photos/danielbuechele/
 8 | [3] https://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/
 9 | 
10 | 
11 | Image: flower.jpg
12 | Released under a creative commons license. [1]
13 | Attribution: Some rights reserved by danielbuechele [2]
14 | Retrieved 21st August, 2011 from [3] by Robert Layton
15 | 
16 | [1] https://creativecommons.org/licenses/by/2.0/
17 | [2] https://www.flickr.com/photos/vultilion/
18 | [3] https://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/sklearn/datasets/descr/linnerud.rst:
--------------------------------------------------------------------------------
 1 | .. _linnerrud_dataset:
 2 | 
 3 | Linnerrud dataset
 4 | -----------------
 5 | 
 6 | **Data Set Characteristics:**
 7 | 
 8 |     :Number of Instances: 20
 9 |     :Number of Attributes: 3
10 |     :Missing Attribute Values: None
11 | 
12 | The Linnerud dataset is a multi-output regression dataset. It consists of three
13 | exercise (data) and three physiological (target) variables collected from
14 | twenty middle-aged men in a fitness club:
15 | 
16 | - *physiological* - CSV containing 20 observations on 3 physiological variables:
17 |    Weight, Waist and Pulse.
18 | - *exercise* - CSV containing 20 observations on 3 exercise variables:
19 |    Chins, Situps and Jumps.
20 | 
21 | .. topic:: References
22 | 
23 |   * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris:
24 |     Editions Technic.
25 | 


--------------------------------------------------------------------------------
/sklearn/experimental/enable_hist_gradient_boosting.py:
--------------------------------------------------------------------------------
 1 | """This is now a no-op and can be safely removed from your code.
 2 | 
 3 | It used to enable the use of
 4 | :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
 5 | :class:`~sklearn.ensemble.HistGradientBoostingRegressor` when they were still
 6 | :term:`experimental`, but these estimators are now stable and can be imported
 7 | normally from `sklearn.ensemble`.
 8 | """
 9 | # Don't remove this file, we don't want to break users code just because the
10 | # feature isn't experimental anymore.
11 | 
12 | 
13 | import warnings
14 | 
15 | 
16 | warnings.warn(
17 |     "Since version 1.0, "
18 |     "it is not needed to import enable_hist_gradient_boosting anymore. "
19 |     "HistGradientBoostingClassifier and HistGradientBoostingRegressor are now "
20 |     "stable and can be normally imported from sklearn.ensemble."
21 | )
22 | 


--------------------------------------------------------------------------------
/sklearn/utils/_joblib.py:
--------------------------------------------------------------------------------
 1 | import warnings as _warnings
 2 | 
 3 | with _warnings.catch_warnings():
 4 |     _warnings.simplefilter("ignore")
 5 |     # joblib imports may raise DeprecationWarning on certain Python
 6 |     # versions
 7 |     import joblib
 8 |     from joblib import logger
 9 |     from joblib import dump, load
10 |     from joblib import __version__
11 |     from joblib import effective_n_jobs
12 |     from joblib import hash
13 |     from joblib import cpu_count, Parallel, Memory, delayed
14 |     from joblib import parallel_backend, register_parallel_backend
15 | 
16 | 
17 | __all__ = [
18 |     "parallel_backend",
19 |     "register_parallel_backend",
20 |     "cpu_count",
21 |     "Parallel",
22 |     "Memory",
23 |     "delayed",
24 |     "effective_n_jobs",
25 |     "hash",
26 |     "logger",
27 |     "dump",
28 |     "load",
29 |     "joblib",
30 |     "__version__",
31 | ]
32 | 


--------------------------------------------------------------------------------
/.github/workflows/assign.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: Assign
 3 | on:
 4 |   issue_comment:
 5 |     types: created
 6 | 
 7 | jobs:
 8 |   one:
 9 |     runs-on: ubuntu-latest
10 |     if: >-
11 |       (github.event.comment.body == 'take' ||
12 |        github.event.comment.body == 'Take')
13 |       && !github.event.issue.assignee
14 |     steps:
15 |       - run: |
16 |           echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
17 |           curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
18 |           curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X "DELETE" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/help%20wanted
19 | 


--------------------------------------------------------------------------------
/sklearn/utils/_logistic_sigmoid.pyx:
--------------------------------------------------------------------------------
 1 | from libc.math cimport log, exp
 2 | 
 3 | import numpy as np
 4 | cimport numpy as np
 5 | 
 6 | np.import_array()
 7 | ctypedef np.float64_t DTYPE_t
 8 | 
 9 | 
10 | cdef inline DTYPE_t _inner_log_logistic_sigmoid(const DTYPE_t x):
11 |     """Log of the logistic sigmoid function log(1 / (1 + e ** -x))"""
12 |     if x > 0:
13 |         return -log(1. + exp(-x))
14 |     else:
15 |         return x - log(1. + exp(x))
16 | 
17 | 
18 | def _log_logistic_sigmoid(unsigned int n_samples,
19 |                           unsigned int n_features,
20 |                           DTYPE_t[:, :] X,
21 |                           DTYPE_t[:, :] out):
22 |     cdef:
23 |         unsigned int i
24 |         unsigned int j
25 | 
26 |     for i in range(n_samples):
27 |         for j in range(n_features):
28 |             out[i, j] = _inner_log_logistic_sigmoid(X[i, j])
29 |     return out
30 | 


--------------------------------------------------------------------------------
/sklearn/utils/_typedefs.pyx:
--------------------------------------------------------------------------------
 1 | #!python
 2 | 
 3 | import numpy as np
 4 | cimport numpy as np
 5 | from libc.math cimport sqrt
 6 | 
 7 | np.import_array()
 8 | 
 9 | 
10 | # use a hack to determine the associated numpy data types
11 | # NOTE: the following requires the buffer interface, only available in
12 | #       numpy 1.5+.  We'll choose the DTYPE by hand instead.
13 | #cdef ITYPE_t idummy
14 | #cdef ITYPE_t[:] idummy_view = <ITYPE_t[:1]> &idummy
15 | #ITYPE = np.asarray(idummy_view).dtype
16 | ITYPE = np.intp  # WARNING: this should match ITYPE_t in typedefs.pxd
17 | 
18 | #cdef DTYPE_t ddummy
19 | #cdef DTYPE_t[:] ddummy_view = <DTYPE_t[:1]> &ddummy
20 | #DTYPE = np.asarray(ddummy_view).dtype
21 | DTYPE = np.float64  # WARNING: this should match DTYPE_t in typedefs.pxd
22 | 
23 | # some handy constants
24 | cdef DTYPE_t INF = np.inf
25 | cdef DTYPE_t PI = np.pi
26 | cdef DTYPE_t ROOT_2PI = sqrt(2 * PI)
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature request
 2 | description: Suggest a new algorithm, enhancement to an existing algorithm, etc.
 3 | labels: ['New Feature']
 4 | 
 5 | body:
 6 | - type: markdown
 7 |   attributes:
 8 |     value: >
 9 |       #### If you want to propose a new algorithm, please refer first to the [scikit-learn inclusion criterion](https://scikit-learn.org/stable/faq.html#what-are-the-inclusion-criteria-for-new-algorithms).
10 | - type: textarea
11 |   attributes:
12 |     label: Describe the workflow you want to enable
13 |   validations:
14 |     required: true
15 | - type: textarea
16 |   attributes:
17 |     label: Describe your proposed solution
18 |   validations:
19 |     required: true
20 | - type: textarea
21 |   attributes:
22 |     label: Describe alternatives you've considered, if relevant
23 | - type: textarea
24 |   attributes:
25 |     label: Additional context
26 | 


--------------------------------------------------------------------------------
/sklearn/datasets/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import os
 3 | import platform
 4 | 
 5 | 
 6 | def configuration(parent_package="", top_path=None):
 7 |     from numpy.distutils.misc_util import Configuration
 8 | 
 9 |     config = Configuration("datasets", parent_package, top_path)
10 |     config.add_data_dir("data")
11 |     config.add_data_dir("descr")
12 |     config.add_data_dir("images")
13 |     config.add_data_dir(os.path.join("tests", "data"))
14 |     if platform.python_implementation() != "PyPy":
15 |         config.add_extension(
16 |             "_svmlight_format_fast",
17 |             sources=["_svmlight_format_fast.pyx"],
18 |             include_dirs=[numpy.get_include()],
19 |         )
20 |     config.add_subpackage("tests")
21 |     return config
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     from numpy.distutils.core import setup
26 | 
27 |     setup(**configuration(top_path="").todict())
28 | 


--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py:
--------------------------------------------------------------------------------
 1 | """Script to download the movie review dataset"""
 2 | 
 3 | import os
 4 | import tarfile
 5 | from contextlib import closing
 6 | from urllib.request import urlopen
 7 | 
 8 | 
 9 | URL = ("http://www.cs.cornell.edu/people/pabo/"
10 |        "movie-review-data/review_polarity.tar.gz")
11 | 
12 | ARCHIVE_NAME = URL.rsplit('/', 1)[1]
13 | DATA_FOLDER = "txt_sentoken"
14 | 
15 | 
16 | if not os.path.exists(DATA_FOLDER):
17 | 
18 |     if not os.path.exists(ARCHIVE_NAME):
19 |         print("Downloading dataset from %s (3 MB)" % URL)
20 |         opener = urlopen(URL)
21 |         with open(ARCHIVE_NAME, 'wb') as archive:
22 |             archive.write(opener.read())
23 | 
24 |     print("Decompressing %s" % ARCHIVE_NAME)
25 |     with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive:
26 |         archive.extractall(path='.')
27 |     os.remove(ARCHIVE_NAME)
28 | 


--------------------------------------------------------------------------------
/asv_benchmarks/benchmarks/svm.py:
--------------------------------------------------------------------------------
 1 | from sklearn.svm import SVC
 2 | 
 3 | from .common import Benchmark, Estimator, Predictor
 4 | from .datasets import _synth_classification_dataset
 5 | from .utils import make_gen_classif_scorers
 6 | 
 7 | 
 8 | class SVCBenchmark(Predictor, Estimator, Benchmark):
 9 |     """Benchmarks for SVC."""
10 | 
11 |     param_names = ["kernel"]
12 |     params = (["linear", "poly", "rbf", "sigmoid"],)
13 | 
14 |     def setup_cache(self):
15 |         super().setup_cache()
16 | 
17 |     def make_data(self, params):
18 |         return _synth_classification_dataset()
19 | 
20 |     def make_estimator(self, params):
21 |         (kernel,) = params
22 | 
23 |         estimator = SVC(
24 |             max_iter=100, tol=1e-16, kernel=kernel, random_state=0, gamma="scale"
25 |         )
26 | 
27 |         return estimator
28 | 
29 |     def make_scorers(self):
30 |         make_gen_classif_scorers(self)
31 | 


--------------------------------------------------------------------------------
/examples/decomposition/plot_beta_divergence.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==============================
 3 | Beta-divergence loss functions
 4 | ==============================
 5 | 
 6 | A plot that compares the various Beta-divergence loss functions supported by
 7 | the Multiplicative-Update ('mu') solver in :class:`~sklearn.decomposition.NMF`.
 8 | 
 9 | """
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | from sklearn.decomposition._nmf import _beta_divergence
14 | 
15 | x = np.linspace(0.001, 4, 1000)
16 | y = np.zeros(x.shape)
17 | 
18 | colors = "mbgyr"
19 | for j, beta in enumerate((0.0, 0.5, 1.0, 1.5, 2.0)):
20 |     for i, xi in enumerate(x):
21 |         y[i] = _beta_divergence(1, xi, 1, beta)
22 |     name = "beta = %1.1f" % beta
23 |     plt.plot(x, y, label=name, color=colors[j])
24 | 
25 | plt.xlabel("x")
26 | plt.title("beta-divergence(1, x)")
27 | plt.legend(loc=0)
28 | plt.axis([0, 4, 0, 3])
29 | plt.show()
30 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: true
 2 | contact_links:
 3 |   - name: Discussions
 4 |     url: https://github.com/scikit-learn/scikit-learn/discussions/new
 5 |     about: Ask questions and discuss with other scikit-learn community members
 6 |   - name: Stack Overflow
 7 |     url: https://stackoverflow.com/questions/tagged/scikit-learn
 8 |     about: Please ask and answer usage questions on Stack Overflow
 9 |   - name: Mailing list
10 |     url: https://mail.python.org/mailman/listinfo/scikit-learn
11 |     about: General discussions and announcements on the mailing list
12 |   - name: Gitter
13 |     url: https://gitter.im/scikit-learn/scikit-learn
14 |     about: Users and developers can sometimes be found on the gitter channel
15 |   - name: Blank issue
16 |     url: https://github.com/scikit-learn/scikit-learn/issues/new
17 |     about: Please note that Github Discussions should be used in most cases instead
18 | 


--------------------------------------------------------------------------------
/doc/includes/big_toc_css.rst:
--------------------------------------------------------------------------------
 1 | ..  
 2 |     File to ..include in a document with a big table of content, to give
 3 |     it 'style'
 4 | 
 5 | .. raw:: html
 6 | 
 7 |   <style type="text/css">
 8 |     div.body div.toctree-wrapper ul {
 9 |         padding-left: 0;
10 |     }
11 | 
12 |     div.body li.toctree-l1 {
13 |         padding: 0 0 0.5em 0;
14 |         list-style-type: none;
15 |         font-size: 150%;
16 |         font-weight: bold;
17 |     }
18 | 
19 |     div.body li.toctree-l2 {
20 |         font-size: 70%;
21 |         list-style-type: square;
22 |         font-weight: normal;
23 |         margin-left: 40px;
24 |     }
25 | 
26 |     div.body li.toctree-l3 {
27 |         font-size: 85%;
28 |         list-style-type: circle;
29 |         font-weight: normal;
30 |         margin-left: 40px;
31 |     }
32 | 
33 |     div.body li.toctree-l4 {
34 |         margin-left: 40px;
35 |     }
36 |  
37 |   </style>
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_arrayfuncs.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | 
 4 | from sklearn.utils._testing import assert_allclose
 5 | from sklearn.utils.arrayfuncs import min_pos
 6 | 
 7 | 
 8 | def test_min_pos():
 9 |     # Check that min_pos returns a positive value and that it's consistent
10 |     # between float and double
11 |     X = np.random.RandomState(0).randn(100)
12 | 
13 |     min_double = min_pos(X)
14 |     min_float = min_pos(X.astype(np.float32))
15 | 
16 |     assert_allclose(min_double, min_float)
17 |     assert min_double >= 0
18 | 
19 | 
20 | @pytest.mark.parametrize("dtype", [np.float32, np.float64])
21 | def test_min_pos_no_positive(dtype):
22 |     # Check that the return value of min_pos is the maximum representable
23 |     # value of the input dtype when all input elements are <= 0 (#19328)
24 |     X = np.full(100, -1.0).astype(dtype, copy=False)
25 | 
26 |     assert min_pos(X) == np.finfo(dtype).max
27 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_optimize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sklearn.utils.optimize import _newton_cg
 4 | from scipy.optimize import fmin_ncg
 5 | 
 6 | from sklearn.utils._testing import assert_array_almost_equal
 7 | 
 8 | 
 9 | def test_newton_cg():
10 |     # Test that newton_cg gives same result as scipy's fmin_ncg
11 | 
12 |     rng = np.random.RandomState(0)
13 |     A = rng.normal(size=(10, 10))
14 |     x0 = np.ones(10)
15 | 
16 |     def func(x):
17 |         Ax = A.dot(x)
18 |         return 0.5 * (Ax).dot(Ax)
19 | 
20 |     def grad(x):
21 |         return A.T.dot(A.dot(x))
22 | 
23 |     def hess(x, p):
24 |         return p.dot(A.T.dot(A.dot(x.all())))
25 | 
26 |     def grad_hess(x):
27 |         return grad(x), lambda x: A.T.dot(A.dot(x))
28 | 
29 |     assert_array_almost_equal(
30 |         _newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0],
31 |         fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess),
32 |     )
33 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/_sgd_fast.pxd:
--------------------------------------------------------------------------------
 1 | # License: BSD 3 clause
 2 | """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx"""
 3 | 
 4 | cdef class LossFunction:
 5 |     cdef double loss(self, double p, double y) nogil
 6 |     cdef double dloss(self, double p, double y) nogil
 7 | 
 8 | 
 9 | cdef class Regression(LossFunction):
10 |     cdef double loss(self, double p, double y) nogil
11 |     cdef double dloss(self, double p, double y) nogil
12 | 
13 | 
14 | cdef class Classification(LossFunction):
15 |     cdef double loss(self, double p, double y) nogil
16 |     cdef double dloss(self, double p, double y) nogil
17 | 
18 | 
19 | cdef class Log(Classification):
20 |     cdef double loss(self, double p, double y) nogil
21 |     cdef double dloss(self, double p, double y) nogil
22 | 
23 | 
24 | cdef class SquaredLoss(Regression):
25 |     cdef double loss(self, double p, double y) nogil
26 |     cdef double dloss(self, double p, double y) nogil
27 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_cython_templating.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import pytest
 3 | import sklearn
 4 | 
 5 | 
 6 | def test_files_generated_by_templates_are_git_ignored():
 7 |     """Check the consistence of the files generated from template files."""
 8 |     gitignore_file = pathlib.Path(sklearn.__file__).parent.parent / ".gitignore"
 9 |     if not gitignore_file.exists():
10 |         pytest.skip("Tests are not run from the source folder")
11 | 
12 |     base_dir = pathlib.Path(sklearn.__file__).parent
13 |     ignored_files = gitignore_file.read_text().split("\n")
14 |     ignored_files = [pathlib.Path(line) for line in ignored_files]
15 | 
16 |     for filename in base_dir.glob("**/*.tp"):
17 |         filename = filename.relative_to(base_dir.parent)
18 |         # From "path/to/template.p??.tp" to "path/to/template.p??"
19 |         filename_wo_tempita_suffix = filename.with_suffix("")
20 |         assert filename_wo_tempita_suffix in ignored_files
21 | 


--------------------------------------------------------------------------------
/sklearn/svm/src/liblinear/tron.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TRON_H
 2 | #define _TRON_H
 3 | 
 4 | #include "_cython_blas_helpers.h"
 5 | 
 6 | class function
 7 | {
 8 | public:
 9 | 	virtual double fun(double *w) = 0 ;
10 | 	virtual void grad(double *w, double *g) = 0 ;
11 | 	virtual void Hv(double *s, double *Hs) = 0 ;
12 | 
13 | 	virtual int get_nr_variable(void) = 0 ;
14 | 	virtual ~function(void){}
15 | };
16 | 
17 | class TRON
18 | {
19 | public:
20 | 	TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000, BlasFunctions *blas = 0);
21 | 	~TRON();
22 | 
23 | 	int tron(double *w);
24 | 	void set_print_string(void (*i_print) (const char *buf));
25 | 
26 | private:
27 | 	int trcg(double delta, double *g, double *s, double *r);
28 | 	double norm_inf(int n, double *x);
29 | 
30 | 	double eps;
31 | 	int max_iter;
32 | 	function *fun_obj;
33 | 	BlasFunctions *blas;
34 | 	void info(const char *fmt,...);
35 | 	void (*tron_print_string)(const char *buf);
36 | };
37 | #endif
38 | 


--------------------------------------------------------------------------------
/sklearn/utils/murmurhash.pxd:
--------------------------------------------------------------------------------
 1 | """Export fast murmurhash C/C++ routines + cython wrappers"""
 2 | 
 3 | cimport numpy as np
 4 | 
 5 | # The C API is disabled for now, since it requires -I flags to get
 6 | # compilation to work even when these functions are not used.
 7 | #cdef extern from "MurmurHash3.h":
 8 | #    void MurmurHash3_x86_32(void* key, int len, unsigned int seed,
 9 | #                            void* out)
10 | #
11 | #    void MurmurHash3_x86_128(void* key, int len, unsigned int seed,
12 | #                             void* out)
13 | #
14 | #    void MurmurHash3_x64_128(void* key, int len, unsigned int seed,
15 | #                             void* out)
16 | 
17 | 
18 | cpdef np.uint32_t murmurhash3_int_u32(int key, unsigned int seed)
19 | cpdef np.int32_t murmurhash3_int_s32(int key, unsigned int seed)
20 | cpdef np.uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed)
21 | cpdef np.int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed)
22 | 


--------------------------------------------------------------------------------
/sklearn/decomposition/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy
 3 | from numpy.distutils.misc_util import Configuration
 4 | 
 5 | 
 6 | def configuration(parent_package="", top_path=None):
 7 |     config = Configuration("decomposition", parent_package, top_path)
 8 | 
 9 |     libraries = []
10 |     if os.name == "posix":
11 |         libraries.append("m")
12 | 
13 |     config.add_extension(
14 |         "_online_lda_fast",
15 |         sources=["_online_lda_fast.pyx"],
16 |         include_dirs=[numpy.get_include()],
17 |         libraries=libraries,
18 |     )
19 | 
20 |     config.add_extension(
21 |         "_cdnmf_fast",
22 |         sources=["_cdnmf_fast.pyx"],
23 |         include_dirs=[numpy.get_include()],
24 |         libraries=libraries,
25 |     )
26 | 
27 |     config.add_subpackage("tests")
28 | 
29 |     return config
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     from numpy.distutils.core import setup
34 | 
35 |     setup(**configuration().todict())
36 | 


--------------------------------------------------------------------------------
/doc/tutorial/index.rst:
--------------------------------------------------------------------------------
 1 | .. Places global toc into the sidebar
 2 | 
 3 | :globalsidebartoc: True
 4 | 
 5 | .. _tutorial_menu:
 6 | 
 7 | 
 8 | .. include:: ../includes/big_toc_css.rst
 9 | .. include:: ../tune_toc.rst
10 | 
11 | ======================
12 | scikit-learn Tutorials
13 | ======================
14 | 
15 | |
16 | 
17 | .. toctree::
18 |    :maxdepth: 2
19 | 
20 |    basic/tutorial.rst
21 |    statistical_inference/index.rst
22 |    text_analytics/working_with_text_data.rst
23 |    machine_learning_map/index
24 |    ../presentations
25 | 
26 | |
27 | 
28 | .. note:: **Doctest Mode**
29 | 
30 |    The code-examples in the above tutorials are written in a
31 |    *python-console* format. If you wish to easily execute these examples
32 |    in **IPython**, use::
33 | 
34 | 	%doctest_mode
35 | 
36 |    in the IPython-console. You can then simply copy and paste the examples
37 |    directly into IPython without having to worry about removing the **>>>**
38 |    manually.
39 | 


--------------------------------------------------------------------------------
/.github/workflows/twitter.yml:
--------------------------------------------------------------------------------
 1 | # Tweet the URL of a commit on @sklearn_commits whenever a push event
 2 | # happens on the main branch
 3 | name: Twitter Push Notification
 4 | 
 5 | 
 6 | on:
 7 |   push:
 8 |     branches:
 9 |       - main
10 | 
11 | 
12 | jobs:
13 |   tweet:
14 |     name: Twitter Notification
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - name: Tweet URL of last commit as @sklearn_commits
18 |         if: github.repository == 'scikit-learn/scikit-learn'
19 |         uses: docker://thomasjpfan/twitter-action:0.3
20 |         with:
21 |           args: "-message \"https://github.com/scikit-learn/scikit-learn/commit/${{ github.sha }}\""
22 |         env:
23 |           TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }}
24 |           TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }}
25 |           TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
26 |           TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }}
27 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_fast_dict.py:
--------------------------------------------------------------------------------
 1 | """ Test fast_dict.
 2 | """
 3 | import numpy as np
 4 | 
 5 | from sklearn.utils._fast_dict import IntFloatDict, argmin
 6 | 
 7 | 
 8 | def test_int_float_dict():
 9 |     rng = np.random.RandomState(0)
10 |     keys = np.unique(rng.randint(100, size=10).astype(np.intp))
11 |     values = rng.rand(len(keys))
12 | 
13 |     d = IntFloatDict(keys, values)
14 |     for key, value in zip(keys, values):
15 |         assert d[key] == value
16 |     assert len(d) == len(keys)
17 | 
18 |     d.append(120, 3.0)
19 |     assert d[120] == 3.0
20 |     assert len(d) == len(keys) + 1
21 |     for i in range(2000):
22 |         d.append(i + 1000, 4.0)
23 |     assert d[1100] == 4.0
24 | 
25 | 
26 | def test_int_float_dict_argmin():
27 |     # Test the argmin implementation on the IntFloatDict
28 |     keys = np.arange(100, dtype=np.intp)
29 |     values = np.arange(100, dtype=np.float64)
30 |     d = IntFloatDict(keys, values)
31 |     assert argmin(d) == (0, 0)
32 | 


--------------------------------------------------------------------------------
/asv_benchmarks/benchmarks/manifold.py:
--------------------------------------------------------------------------------
 1 | from sklearn.manifold import TSNE
 2 | 
 3 | from .common import Benchmark, Estimator
 4 | from .datasets import _digits_dataset
 5 | 
 6 | 
 7 | class TSNEBenchmark(Estimator, Benchmark):
 8 |     """
 9 |     Benchmarks for t-SNE.
10 |     """
11 | 
12 |     param_names = ["method"]
13 |     params = (["exact", "barnes_hut"],)
14 | 
15 |     def setup_cache(self):
16 |         super().setup_cache()
17 | 
18 |     def make_data(self, params):
19 |         (method,) = params
20 | 
21 |         n_samples = 500 if method == "exact" else None
22 | 
23 |         return _digits_dataset(n_samples=n_samples)
24 | 
25 |     def make_estimator(self, params):
26 |         (method,) = params
27 | 
28 |         estimator = TSNE(random_state=0, method=method)
29 | 
30 |         return estimator
31 | 
32 |     def make_scorers(self):
33 |         self.train_scorer = lambda _, __: self.estimator.kl_divergence_
34 |         self.test_scorer = lambda _, __: self.estimator.kl_divergence_
35 | 


--------------------------------------------------------------------------------
/sklearn/neural_network/tests/test_base.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | 
 4 | from sklearn.neural_network._base import binary_log_loss
 5 | from sklearn.neural_network._base import log_loss
 6 | 
 7 | 
 8 | def test_binary_log_loss_1_prob_finite():
 9 |     # y_proba is equal to one should result in a finite logloss
10 |     y_true = np.array([[0, 0, 1]]).T
11 |     y_prob = np.array([[0.9, 1.0, 1.0]]).T
12 | 
13 |     loss = binary_log_loss(y_true, y_prob)
14 |     assert np.isfinite(loss)
15 | 
16 | 
17 | @pytest.mark.parametrize(
18 |     "y_true, y_prob",
19 |     [
20 |         (
21 |             np.array([[1, 0, 0], [0, 1, 0]]),
22 |             np.array([[0.0, 1.0, 0.0], [0.9, 0.05, 0.05]]),
23 |         ),
24 |         (np.array([[0, 0, 1]]).T, np.array([[0.9, 1.0, 1.0]]).T),
25 |     ],
26 | )
27 | def test_log_loss_1_prob_finite(y_true, y_prob):
28 |     # y_proba is equal to 1 should result in a finite logloss
29 |     loss = log_loss(y_true, y_prob)
30 |     assert np.isfinite(loss)
31 | 


--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/data/twenty_newsgroups/fetch_data.py:
--------------------------------------------------------------------------------
 1 | """Script to download the 20 newsgroups text classification set"""
 2 | 
 3 | import os
 4 | import tarfile
 5 | from contextlib import closing
 6 | from urllib.request import urlopen
 7 | 
 8 | URL = ("http://people.csail.mit.edu/jrennie/"
 9 |        "20Newsgroups/20news-bydate.tar.gz")
10 | 
11 | ARCHIVE_NAME = URL.rsplit('/', 1)[1]
12 | TRAIN_FOLDER = "20news-bydate-train"
13 | TEST_FOLDER = "20news-bydate-test"
14 | 
15 | 
16 | if not os.path.exists(TRAIN_FOLDER) or not os.path.exists(TEST_FOLDER):
17 | 
18 |     if not os.path.exists(ARCHIVE_NAME):
19 |         print("Downloading dataset from %s (14 MB)" % URL)
20 |         opener = urlopen(URL)
21 |         with open(ARCHIVE_NAME, 'wb') as archive:
22 |             archive.write(opener.read())
23 | 
24 |     print("Decompressing %s" % ARCHIVE_NAME)
25 |     with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive:
26 |         archive.extractall(path='.')
27 |     os.remove(ARCHIVE_NAME)
28 | 


--------------------------------------------------------------------------------
/examples/model_selection/plot_cv_predict.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ====================================
 3 | Plotting Cross-Validated Predictions
 4 | ====================================
 5 | 
 6 | This example shows how to use
 7 | :func:`~sklearn.model_selection.cross_val_predict` to visualize prediction
 8 | errors.
 9 | 
10 | """
11 | 
12 | from sklearn import datasets
13 | from sklearn.model_selection import cross_val_predict
14 | from sklearn import linear_model
15 | import matplotlib.pyplot as plt
16 | 
17 | lr = linear_model.LinearRegression()
18 | X, y = datasets.load_diabetes(return_X_y=True)
19 | 
20 | # cross_val_predict returns an array of the same size as `y` where each entry
21 | # is a prediction obtained by cross validation:
22 | predicted = cross_val_predict(lr, X, y, cv=10)
23 | 
24 | fig, ax = plt.subplots()
25 | ax.scatter(y, predicted, edgecolors=(0, 0, 0))
26 | ax.plot([y.min(), y.max()], [y.min(), y.max()], "k--", lw=4)
27 | ax.set_xlabel("Measured")
28 | ax.set_ylabel("Predicted")
29 | plt.show()
30 | 


--------------------------------------------------------------------------------
/benchmarks/plot_tsne_mnist.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import os.path as op
 4 | 
 5 | import argparse
 6 | 
 7 | 
 8 | LOG_DIR = "mnist_tsne_output"
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     parser = argparse.ArgumentParser("Plot benchmark results for t-SNE")
13 |     parser.add_argument(
14 |         "--labels",
15 |         type=str,
16 |         default=op.join(LOG_DIR, "mnist_original_labels_10000.npy"),
17 |         help="1D integer numpy array for labels",
18 |     )
19 |     parser.add_argument(
20 |         "--embedding",
21 |         type=str,
22 |         default=op.join(LOG_DIR, "mnist_sklearn_TSNE_10000.npy"),
23 |         help="2D float numpy array for embedded data",
24 |     )
25 |     args = parser.parse_args()
26 | 
27 |     X = np.load(args.embedding)
28 |     y = np.load(args.labels)
29 | 
30 |     for i in np.unique(y):
31 |         mask = y == i
32 |         plt.scatter(X[mask, 0], X[mask, 1], alpha=0.2, label=int(i))
33 |     plt.legend(loc="best")
34 |     plt.show()
35 | 


--------------------------------------------------------------------------------
/sklearn/manifold/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy
 4 | 
 5 | 
 6 | def configuration(parent_package="", top_path=None):
 7 |     from numpy.distutils.misc_util import Configuration
 8 | 
 9 |     config = Configuration("manifold", parent_package, top_path)
10 | 
11 |     libraries = []
12 |     if os.name == "posix":
13 |         libraries.append("m")
14 | 
15 |     config.add_extension(
16 |         "_utils",
17 |         sources=["_utils.pyx"],
18 |         include_dirs=[numpy.get_include()],
19 |         libraries=libraries,
20 |         extra_compile_args=["-O3"],
21 |     )
22 | 
23 |     config.add_extension(
24 |         "_barnes_hut_tsne",
25 |         sources=["_barnes_hut_tsne.pyx"],
26 |         include_dirs=[numpy.get_include()],
27 |         libraries=libraries,
28 |         extra_compile_args=["-O3"],
29 |     )
30 | 
31 |     config.add_subpackage("tests")
32 | 
33 |     return config
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     from numpy.distutils.core import setup
38 | 
39 |     setup(**configuration().todict())
40 | 


--------------------------------------------------------------------------------
/sklearn/metrics/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | from numpy.distutils.misc_util import Configuration
 5 | 
 6 | 
 7 | def configuration(parent_package="", top_path=None):
 8 |     config = Configuration("metrics", parent_package, top_path)
 9 | 
10 |     libraries = []
11 |     if os.name == "posix":
12 |         libraries.append("m")
13 | 
14 |     config.add_subpackage("_plot")
15 |     config.add_subpackage("_plot.tests")
16 |     config.add_subpackage("cluster")
17 | 
18 |     config.add_extension(
19 |         "_pairwise_fast", sources=["_pairwise_fast.pyx"], libraries=libraries
20 |     )
21 | 
22 |     config.add_extension(
23 |         "_dist_metrics",
24 |         sources=["_dist_metrics.pyx"],
25 |         include_dirs=[np.get_include(), os.path.join(np.get_include(), "numpy")],
26 |         libraries=libraries,
27 |     )
28 | 
29 |     config.add_subpackage("tests")
30 | 
31 |     return config
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     from numpy.distutils.core import setup
36 | 
37 |     setup(**configuration().todict())
38 | 


--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         # Commits pushed to main should not make the overall
 8 |         # project coverage decrease by more than 1%:
 9 |         target: auto
10 |         threshold: 1%
11 |     patch:
12 |       default:
13 |         # Be tolerant on slight code coverage diff on PRs to limit
14 |         # noisy red coverage status on github PRs.
15 |         # Note: The coverage stats are still uploaded
16 |         # to codecov so that PR reviewers can see uncovered lines
17 |         target: auto
18 |         threshold: 1%
19 | 
20 | codecov:
21 |   notify:
22 |     # Prevent coverage status to upload multiple times for parallel and long
23 |     # running CI pipelines. This configuration is particularly useful on PRs
24 |     # to avoid confusion. Note that this value is set to the number of Azure
25 |     # Pipeline jobs uploading coverage reports.
26 |     after_n_builds: 6
27 | 
28 | ignore:
29 | - "sklearn/externals"
30 | - "sklearn/_build_utils"
31 | - "**/setup.py"
32 | 


--------------------------------------------------------------------------------
/sklearn/cluster/tests/common.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Common utilities for testing clustering.
 3 | 
 4 | """
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | ###############################################################################
10 | # Generate sample data
11 | 
12 | 
13 | def generate_clustered_data(
14 |     seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=0.4
15 | ):
16 |     prng = np.random.RandomState(seed)
17 | 
18 |     # the data is voluntary shifted away from zero to check clustering
19 |     # algorithm robustness with regards to non centered data
20 |     means = (
21 |         np.array(
22 |             [
23 |                 [1, 1, 1, 0],
24 |                 [-1, -1, 0, 1],
25 |                 [1, -1, 1, 1],
26 |                 [-1, 1, 1, 0],
27 |             ]
28 |         )
29 |         + 10
30 |     )
31 | 
32 |     X = np.empty((0, n_features))
33 |     for i in range(n_clusters):
34 |         X = np.r_[
35 |             X,
36 |             means[i][:n_features] + std * prng.randn(n_samples_per_cluster, n_features),
37 |         ]
38 |     return X
39 | 


--------------------------------------------------------------------------------
/examples/datasets/plot_digits_last_image.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | =========================================================
 4 | The Digit Dataset
 5 | =========================================================
 6 | 
 7 | This dataset is made up of 1797 8x8 images. Each image,
 8 | like the one shown below, is of a hand-written digit.
 9 | In order to utilize an 8x8 figure like this, we'd have to
10 | first transform it into a feature vector with length 64.
11 | 
12 | See `here
13 | <https://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits>`_
14 | for more information about this dataset.
15 | 
16 | """
17 | 
18 | # Code source: Gaël Varoquaux
19 | # Modified for documentation by Jaques Grobler
20 | # License: BSD 3 clause
21 | 
22 | from sklearn import datasets
23 | 
24 | import matplotlib.pyplot as plt
25 | 
26 | # Load the digits dataset
27 | digits = datasets.load_digits()
28 | 
29 | # Display the first digit
30 | plt.figure(1, figsize=(3, 3))
31 | plt.imshow(digits.images[-1], cmap=plt.cm.gray_r, interpolation="nearest")
32 | plt.show()
33 | 


--------------------------------------------------------------------------------
/doc/whats_new.rst:
--------------------------------------------------------------------------------
 1 | .. currentmodule:: sklearn
 2 | .. include:: whats_new/_contributors.rst
 3 | 
 4 | Release History
 5 | ===============
 6 | 
 7 | Release notes for all scikit-learn releases are linked in this page.
 8 | 
 9 | **Tip:** `Subscribe to scikit-learn releases <https://libraries.io/pypi/scikit-learn>`__
10 | on libraries.io to be notified when new versions are released.
11 | 
12 | .. toctree::
13 |     :maxdepth: 1
14 | 
15 |     Version 1.1 <whats_new/v1.1.rst>
16 |     Version 1.0 <whats_new/v1.0.rst>
17 |     Version 0.24 <whats_new/v0.24.rst>
18 |     Version 0.23 <whats_new/v0.23.rst>
19 |     Version 0.22 <whats_new/v0.22.rst>
20 |     Version 0.21 <whats_new/v0.21.rst>
21 |     Version 0.20 <whats_new/v0.20.rst>
22 |     Version 0.19 <whats_new/v0.19.rst>
23 |     Version 0.18 <whats_new/v0.18.rst>
24 |     Version 0.17 <whats_new/v0.17.rst>
25 |     Version 0.16 <whats_new/v0.16.rst>
26 |     Version 0.15 <whats_new/v0.15.rst>
27 |     Version 0.14 <whats_new/v0.14.rst>
28 |     Version 0.13 <whats_new/v0.13.rst>
29 |     Older Versions <whats_new/older_versions.rst>
30 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/test_olivetti_faces.py:
--------------------------------------------------------------------------------
 1 | """Test Olivetti faces fetcher, if the data is available,
 2 | or if specifically requested via environment variable
 3 | (e.g. for travis cron job)."""
 4 | 
 5 | import numpy as np
 6 | 
 7 | from sklearn.utils import Bunch
 8 | from sklearn.datasets.tests.test_common import check_return_X_y
 9 | 
10 | from sklearn.utils._testing import assert_array_equal
11 | 
12 | 
13 | def test_olivetti_faces(fetch_olivetti_faces_fxt):
14 |     data = fetch_olivetti_faces_fxt(shuffle=True, random_state=0)
15 | 
16 |     assert isinstance(data, Bunch)
17 |     for expected_keys in ("data", "images", "target", "DESCR"):
18 |         assert expected_keys in data.keys()
19 | 
20 |     assert data.data.shape == (400, 4096)
21 |     assert data.images.shape == (400, 64, 64)
22 |     assert data.target.shape == (400,)
23 |     assert_array_equal(np.unique(np.sort(data.target)), np.arange(40))
24 |     assert data.DESCR.startswith(".. _olivetti_faces_dataset:")
25 | 
26 |     # test the return_X_y option
27 |     check_return_X_y(data, fetch_olivetti_faces_fxt)
28 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.rst
 2 | recursive-include doc *
 3 | recursive-include examples *
 4 | recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi *.tp
 5 | recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz
 6 | include COPYING
 7 | include README.rst
 8 | include pyproject.toml
 9 | include sklearn/externals/README
10 | include sklearn/svm/src/liblinear/COPYRIGHT
11 | include sklearn/svm/src/libsvm/LIBSVM_CHANGES
12 | include conftest.py
13 | include Makefile
14 | include MANIFEST.in
15 | include .coveragerc
16 | 
17 | # exclude from sdist
18 | recursive-exclude asv_benchmarks *
19 | recursive-exclude benchmarks *
20 | recursive-exclude build_tools *
21 | recursive-exclude maint_tools *
22 | recursive-exclude benchmarks *
23 | recursive-exclude .binder *
24 | recursive-exclude .circleci *
25 | exclude .codecov.yml
26 | exclude .git-blame-ignore-revs
27 | exclude .mailmap
28 | exclude .pre-commit-config.yaml
29 | exclude azure-pipelines.yml
30 | exclude lgtm.yml
31 | exclude CODE_OF_CONDUCT.md
32 | exclude CONTRIBUTING.md
33 | exclude PULL_REQUEST_TEMPLATE.md
34 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_parallel.py:
--------------------------------------------------------------------------------
 1 | from distutils.version import LooseVersion
 2 | 
 3 | import pytest
 4 | from joblib import Parallel
 5 | import joblib
 6 | 
 7 | from numpy.testing import assert_array_equal
 8 | 
 9 | from sklearn._config import config_context, get_config
10 | from sklearn.utils.fixes import delayed
11 | 
12 | 
13 | def get_working_memory():
14 |     return get_config()["working_memory"]
15 | 
16 | 
17 | @pytest.mark.parametrize("n_jobs", [1, 2])
18 | @pytest.mark.parametrize("backend", ["loky", "threading", "multiprocessing"])
19 | def test_configuration_passes_through_to_joblib(n_jobs, backend):
20 |     # Tests that the global global configuration is passed to joblib jobs
21 | 
22 |     if joblib.__version__ < LooseVersion("0.12") and backend == "loky":
23 |         pytest.skip("loky backend does not exist in joblib <0.12")
24 | 
25 |     with config_context(working_memory=123):
26 |         results = Parallel(n_jobs=n_jobs, backend=backend)(
27 |             delayed(get_working_memory)() for _ in range(2)
28 |         )
29 | 
30 |     assert_array_equal(results, [123] * 2)
31 | 


--------------------------------------------------------------------------------
/examples/feature_selection/plot_rfe_digits.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =============================
 3 | Recursive feature elimination
 4 | =============================
 5 | 
 6 | A recursive feature elimination example showing the relevance of pixels in
 7 | a digit classification task.
 8 | 
 9 | .. note::
10 | 
11 |     See also :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`
12 | 
13 | """  # noqa: E501
14 | 
15 | from sklearn.svm import SVC
16 | from sklearn.datasets import load_digits
17 | from sklearn.feature_selection import RFE
18 | import matplotlib.pyplot as plt
19 | 
20 | # Load the digits dataset
21 | digits = load_digits()
22 | X = digits.images.reshape((len(digits.images), -1))
23 | y = digits.target
24 | 
25 | # Create the RFE object and rank each pixel
26 | svc = SVC(kernel="linear", C=1)
27 | rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
28 | rfe.fit(X, y)
29 | ranking = rfe.ranking_.reshape(digits.images[0].shape)
30 | 
31 | # Plot pixel ranking
32 | plt.matshow(ranking, cmap=plt.cm.Blues)
33 | plt.colorbar()
34 | plt.title("Ranking of pixels with RFE")
35 | plt.show()
36 | 


--------------------------------------------------------------------------------
/doc/datasets/real_world.rst:
--------------------------------------------------------------------------------
 1 | .. Places parent toc into the sidebar
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | .. _real_world_datasets:
 6 | 
 7 | Real world datasets
 8 | ===================
 9 | 
10 | .. currentmodule:: sklearn.datasets
11 | 
12 | scikit-learn provides tools to load larger datasets, downloading them if
13 | necessary.
14 | 
15 | They can be loaded using the following functions:
16 | 
17 | .. autosummary::
18 | 
19 |    fetch_olivetti_faces
20 |    fetch_20newsgroups
21 |    fetch_20newsgroups_vectorized
22 |    fetch_lfw_people
23 |    fetch_lfw_pairs
24 |    fetch_covtype
25 |    fetch_rcv1
26 |    fetch_kddcup99
27 |    fetch_california_housing
28 | 
29 | .. include:: ../../sklearn/datasets/descr/olivetti_faces.rst
30 | 
31 | .. include:: ../../sklearn/datasets/descr/twenty_newsgroups.rst
32 | 
33 | .. include:: ../../sklearn/datasets/descr/lfw.rst
34 | 
35 | .. include:: ../../sklearn/datasets/descr/covtype.rst
36 | 
37 | .. include:: ../../sklearn/datasets/descr/rcv1.rst
38 | 
39 | .. include:: ../../sklearn/datasets/descr/kddcup99.rst
40 | 
41 | .. include:: ../../sklearn/datasets/descr/california_housing.rst
42 | 


--------------------------------------------------------------------------------
/examples/exercises/plot_digits_classification_exercise.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ================================
 3 | Digits Classification Exercise
 4 | ================================
 5 | 
 6 | A tutorial exercise regarding the use of classification techniques on
 7 | the Digits dataset.
 8 | 
 9 | This exercise is used in the :ref:`clf_tut` part of the
10 | :ref:`supervised_learning_tut` section of the
11 | :ref:`stat_learn_tut_index`.
12 | 
13 | """
14 | 
15 | from sklearn import datasets, neighbors, linear_model
16 | 
17 | X_digits, y_digits = datasets.load_digits(return_X_y=True)
18 | X_digits = X_digits / X_digits.max()
19 | 
20 | n_samples = len(X_digits)
21 | 
22 | X_train = X_digits[: int(0.9 * n_samples)]
23 | y_train = y_digits[: int(0.9 * n_samples)]
24 | X_test = X_digits[int(0.9 * n_samples) :]
25 | y_test = y_digits[int(0.9 * n_samples) :]
26 | 
27 | knn = neighbors.KNeighborsClassifier()
28 | logistic = linear_model.LogisticRegression(max_iter=1000)
29 | 
30 | print("KNN score: %f" % knn.fit(X_train, y_train).score(X_test, y_test))
31 | print(
32 |     "LogisticRegression score: %f"
33 |     % logistic.fit(X_train, y_train).score(X_test, y_test)
34 | )
35 | 


--------------------------------------------------------------------------------
/sklearn/mixture/tests/test_mixture.py:
--------------------------------------------------------------------------------
 1 | # Author: Guillaume Lemaitre <g.lemaitre58@gmail.com>
 2 | # License: BSD 3 clause
 3 | 
 4 | import pytest
 5 | import numpy as np
 6 | 
 7 | from sklearn.mixture import GaussianMixture
 8 | from sklearn.mixture import BayesianGaussianMixture
 9 | 
10 | 
11 | @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()])
12 | def test_gaussian_mixture_n_iter(estimator):
13 |     # check that n_iter is the number of iteration performed.
14 |     rng = np.random.RandomState(0)
15 |     X = rng.rand(10, 5)
16 |     max_iter = 1
17 |     estimator.set_params(max_iter=max_iter)
18 |     estimator.fit(X)
19 |     assert estimator.n_iter_ == max_iter
20 | 
21 | 
22 | @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()])
23 | def test_mixture_n_components_greater_than_n_samples_error(estimator):
24 |     """Check error when n_components <= n_samples"""
25 |     rng = np.random.RandomState(0)
26 |     X = rng.rand(10, 5)
27 |     estimator.set_params(n_components=12)
28 | 
29 |     msg = "Expected n_samples >= n_components"
30 |     with pytest.raises(ValueError, match=msg):
31 |         estimator.fit(X)
32 | 


--------------------------------------------------------------------------------
/examples/miscellaneous/plot_changed_only_pprint_parameter.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =================================
 3 | Compact estimator representations
 4 | =================================
 5 | 
 6 | This example illustrates the use of the print_changed_only global parameter.
 7 | 
 8 | Setting print_changed_only to True will alternate the representation of
 9 | estimators to only show the parameters that have been set to non-default
10 | values. This can be used to have more compact representations.
11 | 
12 | """
13 | 
14 | from sklearn.linear_model import LogisticRegression
15 | from sklearn import set_config
16 | 
17 | 
18 | lr = LogisticRegression(penalty="l1")
19 | print("Default representation:")
20 | print(lr)
21 | # LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
22 | #                    intercept_scaling=1, l1_ratio=None, max_iter=100,
23 | #                    multi_class='auto', n_jobs=None, penalty='l1',
24 | #                    random_state=None, solver='warn', tol=0.0001, verbose=0,
25 | #                    warm_start=False)
26 | 
27 | set_config(print_changed_only=True)
28 | print("\nWith changed_only option:")
29 | print(lr)
30 | # LogisticRegression(penalty='l1')
31 | 


--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/solutions/generate_skeletons.py:
--------------------------------------------------------------------------------
 1 | """Generate skeletons from the example code"""
 2 | import os
 3 | 
 4 | exercise_dir = os.path.dirname(__file__)
 5 | if exercise_dir == '':
 6 |     exercise_dir = '.'
 7 | 
 8 | skeleton_dir = os.path.abspath(os.path.join(exercise_dir, '..', 'skeletons'))
 9 | if not os.path.exists(skeleton_dir):
10 |     os.makedirs(skeleton_dir)
11 | 
12 | solutions = os.listdir(exercise_dir)
13 | 
14 | for f in solutions:
15 |     if not f.endswith('.py'):
16 |         continue
17 | 
18 |     if f == os.path.basename(__file__):
19 |         continue
20 | 
21 |     print("Generating skeleton for %s" % f)
22 | 
23 |     input_file = open(os.path.join(exercise_dir, f))
24 |     output_file = open(os.path.join(skeleton_dir, f), 'w')
25 | 
26 |     in_exercise_region = False
27 | 
28 |     for line in input_file:
29 |         linestrip = line.strip()
30 |         if len(linestrip) == 0:
31 |             in_exercise_region = False
32 |         elif linestrip.startswith('# TASK:'):
33 |             in_exercise_region = True
34 | 
35 |         if not in_exercise_region or linestrip.startswith('#'):
36 |             output_file.write(line)
37 | 
38 |     output_file.close()
39 | 


--------------------------------------------------------------------------------
/examples/linear_model/plot_lasso_lars.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =====================
 3 | Lasso path using LARS
 4 | =====================
 5 | 
 6 | Computes Lasso Path along the regularization parameter using the LARS
 7 | algorithm on the diabetes dataset. Each color represents a different
 8 | feature of the coefficient vector, and this is displayed as a function
 9 | of the regularization parameter.
10 | 
11 | """
12 | 
13 | # Author: Fabian Pedregosa <fabian.pedregosa@inria.fr>
14 | #         Alexandre Gramfort <alexandre.gramfort@inria.fr>
15 | # License: BSD 3 clause
16 | 
17 | import numpy as np
18 | import matplotlib.pyplot as plt
19 | 
20 | from sklearn import linear_model
21 | from sklearn import datasets
22 | 
23 | X, y = datasets.load_diabetes(return_X_y=True)
24 | 
25 | print("Computing regularization path using the LARS ...")
26 | _, _, coefs = linear_model.lars_path(X, y, method="lasso", verbose=True)
27 | 
28 | xx = np.sum(np.abs(coefs.T), axis=1)
29 | xx /= xx[-1]
30 | 
31 | plt.plot(xx, coefs.T)
32 | ymin, ymax = plt.ylim()
33 | plt.vlines(xx, ymin, ymax, linestyle="dashed")
34 | plt.xlabel("|coef| / max|coef|")
35 | plt.ylabel("Coefficients")
36 | plt.title("LASSO Path")
37 | plt.axis("tight")
38 | plt.show()
39 | 


--------------------------------------------------------------------------------
/doc/inspection.rst:
--------------------------------------------------------------------------------
 1 | .. Places parent toc into the sidebar
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | .. include:: includes/big_toc_css.rst
 6 | 
 7 | .. _inspection:
 8 | 
 9 | Inspection
10 | ----------
11 | 
12 | Predictive performance is often the main goal of developing machine learning
13 | models. Yet summarising performance with an evaluation metric is often
14 | insufficient: it assumes that the evaluation metric and test dataset
15 | perfectly reflect the target domain, which is rarely true. In certain domains,
16 | a model needs a certain level of interpretability before it can be deployed.
17 | A model that is exhibiting performance issues needs to be debugged for one to 
18 | understand the model's underlying issue. The 
19 | :mod:`sklearn.inspection` module provides tools to help understand the 
20 | predictions from a model and what affects them. This can be used to 
21 | evaluate assumptions and biases of a model, design a better model, or
22 | to diagnose issues with model performance.
23 | 
24 | .. topic:: Examples:
25 | 
26 |    * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`
27 | 
28 | .. toctree::
29 | 
30 |     modules/partial_dependence
31 |     modules/permutation_importance
32 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def configuration(parent_package="", top_path=None):
 5 |     import numpy
 6 |     from numpy.distutils.misc_util import Configuration
 7 | 
 8 |     config = Configuration("neighbors", parent_package, top_path)
 9 |     libraries = []
10 |     if os.name == "posix":
11 |         libraries.append("m")
12 | 
13 |     config.add_extension(
14 |         "_ball_tree",
15 |         sources=["_ball_tree.pyx"],
16 |         include_dirs=[numpy.get_include()],
17 |         libraries=libraries,
18 |     )
19 | 
20 |     config.add_extension(
21 |         "_kd_tree",
22 |         sources=["_kd_tree.pyx"],
23 |         include_dirs=[numpy.get_include()],
24 |         libraries=libraries,
25 |     )
26 | 
27 |     config.add_extension(
28 |         "_partition_nodes",
29 |         sources=["_partition_nodes.pyx"],
30 |         include_dirs=[numpy.get_include()],
31 |         language="c++",
32 |         libraries=libraries,
33 |     )
34 | 
35 |     config.add_extension(
36 |         "_quad_tree",
37 |         sources=["_quad_tree.pyx"],
38 |         include_dirs=[numpy.get_include()],
39 |         libraries=libraries,
40 |     )
41 | 
42 |     config.add_subpackage("tests")
43 | 
44 |     return config
45 | 


--------------------------------------------------------------------------------
/doc/datasets/toy_dataset.rst:
--------------------------------------------------------------------------------
 1 | .. Places parent toc into the sidebar
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | .. _toy_datasets:
 6 | 
 7 | Toy datasets
 8 | ============
 9 | 
10 | .. currentmodule:: sklearn.datasets
11 | 
12 | scikit-learn comes with a few small standard datasets that do not require to
13 | download any file from some external website.
14 | 
15 | They can be loaded using the following functions:
16 | 
17 | .. autosummary::
18 | 
19 |    load_boston
20 |    load_iris
21 |    load_diabetes
22 |    load_digits
23 |    load_linnerud
24 |    load_wine
25 |    load_breast_cancer
26 | 
27 | These datasets are useful to quickly illustrate the behavior of the
28 | various algorithms implemented in scikit-learn. They are however often too
29 | small to be representative of real world machine learning tasks.
30 | 
31 | .. include:: ../../sklearn/datasets/descr/boston_house_prices.rst
32 | 
33 | .. include:: ../../sklearn/datasets/descr/iris.rst
34 | 
35 | .. include:: ../../sklearn/datasets/descr/diabetes.rst
36 | 
37 | .. include:: ../../sklearn/datasets/descr/digits.rst
38 | 
39 | .. include:: ../../sklearn/datasets/descr/linnerud.rst
40 | 
41 | .. include:: ../../sklearn/datasets/descr/wine_data.rst
42 | 
43 | .. include:: ../../sklearn/datasets/descr/breast_cancer.rst
44 | 


--------------------------------------------------------------------------------
/sklearn/utils/_arpack.py:
--------------------------------------------------------------------------------
 1 | from .validation import check_random_state
 2 | 
 3 | 
 4 | def _init_arpack_v0(size, random_state):
 5 |     """Initialize the starting vector for iteration in ARPACK functions.
 6 | 
 7 |     Initialize a ndarray with values sampled from the uniform distribution on
 8 |     [-1, 1]. This initialization model has been chosen to be consistent with
 9 |     the ARPACK one as another initialization can lead to convergence issues.
10 | 
11 |     Parameters
12 |     ----------
13 |     size : int
14 |         The size of the eigenvalue vector to be initialized.
15 | 
16 |     random_state : int, RandomState instance or None, default=None
17 |         The seed of the pseudo random number generator used to generate a
18 |         uniform distribution. If int, random_state is the seed used by the
19 |         random number generator; If RandomState instance, random_state is the
20 |         random number generator; If None, the random number generator is the
21 |         RandomState instance used by `np.random`.
22 | 
23 |     Returns
24 |     -------
25 |     v0 : ndarray of shape (size,)
26 |         The initialized vector.
27 |     """
28 |     random_state = check_random_state(random_state)
29 |     v0 = random_state.uniform(-1, 1, size)
30 |     return v0
31 | 


--------------------------------------------------------------------------------
/examples/svm/plot_svm_nonlinear.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==============
 3 | Non-linear SVM
 4 | ==============
 5 | 
 6 | Perform binary classification using non-linear SVC
 7 | with RBF kernel. The target to predict is a XOR of the
 8 | inputs.
 9 | 
10 | The color map illustrates the decision function learned by the SVC.
11 | 
12 | """
13 | 
14 | import numpy as np
15 | import matplotlib.pyplot as plt
16 | from sklearn import svm
17 | 
18 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500))
19 | np.random.seed(0)
20 | X = np.random.randn(300, 2)
21 | Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)
22 | 
23 | # fit the model
24 | clf = svm.NuSVC(gamma="auto")
25 | clf.fit(X, Y)
26 | 
27 | # plot the decision function for each datapoint on the grid
28 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
29 | Z = Z.reshape(xx.shape)
30 | 
31 | plt.imshow(
32 |     Z,
33 |     interpolation="nearest",
34 |     extent=(xx.min(), xx.max(), yy.min(), yy.max()),
35 |     aspect="auto",
36 |     origin="lower",
37 |     cmap=plt.cm.PuOr_r,
38 | )
39 | contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2, linestyles="dashed")
40 | plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired, edgecolors="k")
41 | plt.xticks(())
42 | plt.yticks(())
43 | plt.axis([-3, 3, -3, 3])
44 | plt.show()
45 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | # Minimum requirements for the build system to execute.
 3 | requires = [
 4 |     "setuptools",
 5 |     "wheel",
 6 |     "Cython>=0.28.5",
 7 | 
 8 |     # use oldest-supported-numpy which provides the oldest numpy version with
 9 |     # wheels on PyPI
10 |     #
11 |     # see: https://github.com/scipy/oldest-supported-numpy/blob/master/setup.cfg
12 |     "oldest-supported-numpy; python_version!='3.7' or platform_machine=='aarch64' or platform_system=='AIX' or platform_python_implementation == 'PyPy'",
13 | 
14 |     # Override oldest-supported-numpy setting because pandas 0.25.0 requires 1.14.6
15 |     "numpy==1.14.6; python_version=='3.7' and platform_machine!='aarch64' and platform_system!='AIX' and platform_python_implementation != 'PyPy'",
16 | 
17 |     "scipy>=1.1.0",
18 | ]
19 | 
20 | [tool.black]
21 | line-length = 88
22 | target_version = ['py37', 'py38', 'py39']
23 | experimental_string_processing = true
24 | exclude = '''
25 | /(
26 |     \.eggs         # exclude a few common directories in the
27 |   | \.git          # root of the project
28 |   | \.mypy_cache
29 |   | \.vscode
30 |   | build
31 |   | dist
32 |   | doc/tutorial
33 |   | doc/_build
34 |   | doc/auto_examples
35 |   | sklearn/externals
36 |   | asv_benchmarks/env
37 | )/
38 | '''
39 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_show_versions.py:
--------------------------------------------------------------------------------
 1 | from sklearn.utils.fixes import threadpool_info
 2 | from sklearn.utils._show_versions import _get_sys_info
 3 | from sklearn.utils._show_versions import _get_deps_info
 4 | from sklearn.utils._show_versions import show_versions
 5 | from sklearn.utils._testing import ignore_warnings
 6 | 
 7 | 
 8 | def test_get_sys_info():
 9 |     sys_info = _get_sys_info()
10 | 
11 |     assert "python" in sys_info
12 |     assert "executable" in sys_info
13 |     assert "machine" in sys_info
14 | 
15 | 
16 | def test_get_deps_info():
17 |     with ignore_warnings():
18 |         deps_info = _get_deps_info()
19 | 
20 |     assert "pip" in deps_info
21 |     assert "setuptools" in deps_info
22 |     assert "sklearn" in deps_info
23 |     assert "numpy" in deps_info
24 |     assert "scipy" in deps_info
25 |     assert "Cython" in deps_info
26 |     assert "pandas" in deps_info
27 |     assert "matplotlib" in deps_info
28 |     assert "joblib" in deps_info
29 | 
30 | 
31 | def test_show_versions(capsys):
32 |     with ignore_warnings():
33 |         show_versions()
34 |         out, err = capsys.readouterr()
35 | 
36 |     assert "python" in out
37 |     assert "numpy" in out
38 | 
39 |     info = threadpool_info()
40 |     if info:
41 |         assert "threadpoolctl info:" in out
42 | 


--------------------------------------------------------------------------------
/sklearn/decomposition/_cdnmf_fast.pyx:
--------------------------------------------------------------------------------
 1 | # Author: Mathieu Blondel, Tom Dupre la Tour
 2 | # License: BSD 3 clause
 3 | 
 4 | from cython cimport floating
 5 | from libc.math cimport fabs
 6 | 
 7 | 
 8 | def _update_cdnmf_fast(floating[:, ::1] W, floating[:, :] HHt,
 9 |                        floating[:, :] XHt, Py_ssize_t[::1] permutation):
10 |     cdef:
11 |         floating violation = 0
12 |         Py_ssize_t n_components = W.shape[1]
13 |         Py_ssize_t n_samples = W.shape[0]  # n_features for H update
14 |         floating grad, pg, hess
15 |         Py_ssize_t i, r, s, t
16 | 
17 |     with nogil:
18 |         for s in range(n_components):
19 |             t = permutation[s]
20 | 
21 |             for i in range(n_samples):
22 |                 # gradient = GW[t, i] where GW = np.dot(W, HHt) - XHt
23 |                 grad = -XHt[i, t]
24 | 
25 |                 for r in range(n_components):
26 |                     grad += HHt[t, r] * W[i, r]
27 | 
28 |                 # projected gradient
29 |                 pg = min(0., grad) if W[i, t] == 0 else grad
30 |                 violation += fabs(pg)
31 | 
32 |                 # Hessian
33 |                 hess = HHt[t, t]
34 | 
35 |                 if hess != 0:
36 |                     W[i, t] = max(W[i, t] - grad / hess, 0.)
37 |                 
38 |     return violation
39 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Thanks for contributing a pull request! Please ensure you have taken a look at
 3 | the contribution guidelines: https://github.com/scikit-learn/scikit-learn/blob/main/CONTRIBUTING.md
 4 | -->
 5 | 
 6 | #### Reference Issues/PRs
 7 | <!--
 8 | Example: Fixes #1234. See also #3456.
 9 | Please use keywords (e.g., Fixes) to create link to the issues or pull requests
10 | you resolved, so that they will automatically be closed when your pull request
11 | is merged. See https://github.com/blog/1506-closing-issues-via-pull-requests
12 | -->
13 | 
14 | 
15 | #### What does this implement/fix? Explain your changes.
16 | 
17 | 
18 | #### Any other comments?
19 | 
20 | 
21 | <!--
22 | Please be aware that we are a loose team of volunteers so patience is
23 | necessary; assistance handling other issues is very welcome. We value
24 | all user contributions, no matter how minor they are. If we are slow to
25 | review, either the pull request needs some benchmarking, tinkering,
26 | convincing, etc. or more likely the reviewers are simply busy. In either
27 | case, we ask for your understanding during the review process.
28 | For more information, see our FAQ on this topic:
29 | http://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention.
30 | 
31 | Thanks for contributing!
32 | -->
33 | 


--------------------------------------------------------------------------------
/doc/includes/bigger_toc_css.rst:
--------------------------------------------------------------------------------
 1 | ..  
 2 |     File to ..include in a document with a very big table of content, to 
 3 |     give it 'style'
 4 | 
 5 | .. raw:: html
 6 | 
 7 |   <style type="text/css">
 8 |     div.bodywrapper blockquote {
 9 |         margin: 0 ;
10 |     }
11 | 
12 |     div.toctree-wrapper ul {
13 | 	margin: 0 ;
14 | 	padding-left: 0px ;
15 |     }
16 | 
17 |     li.toctree-l1 {
18 |         padding: 0 ;
19 |         list-style-type: none;
20 |         font-size: 150% ;
21 | 	font-family: Arial, sans-serif;
22 | 	background-color: #BED4EB;
23 | 	font-weight: normal;
24 | 	color: #212224;
25 | 	margin-left : 0;
26 | 	font-weight: bold;
27 |         }
28 | 
29 |     li.toctree-l1 a {
30 |         padding: 0 0 0 10px ;
31 |     }
32 |  
33 |     li.toctree-l2 {
34 |         padding: 0.25em 0 0.25em 0 ;
35 |         list-style-type: none;
36 | 	background-color: #FFFFFF;
37 |         font-size: 90% ;
38 | 	font-weight: bold;
39 |         }
40 | 
41 |     li.toctree-l2 ul {
42 | 	padding-left: 40px ;
43 |     }
44 | 
45 |     li.toctree-l3 {
46 |         font-size: 70% ;
47 |         list-style-type: none;
48 | 	font-weight: normal;
49 |         }
50 | 
51 |     li.toctree-l4 {
52 |         font-size: 85% ;
53 |         list-style-type: none;
54 | 	font-weight: normal;
55 |         }
56 |  
57 |   </style>
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/sklearn/covariance/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.covariance` module includes methods and algorithms to
 3 | robustly estimate the covariance of features given a set of points. The
 4 | precision matrix defined as the inverse of the covariance is also estimated.
 5 | Covariance estimation is closely related to the theory of Gaussian Graphical
 6 | Models.
 7 | """
 8 | 
 9 | from ._empirical_covariance import (
10 |     empirical_covariance,
11 |     EmpiricalCovariance,
12 |     log_likelihood,
13 | )
14 | from ._shrunk_covariance import (
15 |     shrunk_covariance,
16 |     ShrunkCovariance,
17 |     ledoit_wolf,
18 |     ledoit_wolf_shrinkage,
19 |     LedoitWolf,
20 |     oas,
21 |     OAS,
22 | )
23 | from ._robust_covariance import fast_mcd, MinCovDet
24 | from ._graph_lasso import graphical_lasso, GraphicalLasso, GraphicalLassoCV
25 | from ._elliptic_envelope import EllipticEnvelope
26 | 
27 | 
28 | __all__ = [
29 |     "EllipticEnvelope",
30 |     "EmpiricalCovariance",
31 |     "GraphicalLasso",
32 |     "GraphicalLassoCV",
33 |     "LedoitWolf",
34 |     "MinCovDet",
35 |     "OAS",
36 |     "ShrunkCovariance",
37 |     "empirical_covariance",
38 |     "fast_mcd",
39 |     "graphical_lasso",
40 |     "ledoit_wolf",
41 |     "ledoit_wolf_shrinkage",
42 |     "log_likelihood",
43 |     "oas",
44 |     "shrunk_covariance",
45 | ]
46 | 


--------------------------------------------------------------------------------
/sklearn/tests/test_build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | import textwrap
 4 | 
 5 | from sklearn import __version__
 6 | from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled
 7 | 
 8 | 
 9 | def test_openmp_parallelism_enabled():
10 |     # Check that sklearn is built with OpenMP-based parallelism enabled.
11 |     # This test can be skipped by setting the environment variable
12 |     # ``SKLEARN_SKIP_OPENMP_TEST``.
13 |     if os.getenv("SKLEARN_SKIP_OPENMP_TEST"):
14 |         pytest.skip("test explicitly skipped (SKLEARN_SKIP_OPENMP_TEST)")
15 | 
16 |     base_url = "dev" if __version__.endswith(".dev0") else "stable"
17 |     err_msg = textwrap.dedent(
18 |         """
19 |         This test fails because scikit-learn has been built without OpenMP.
20 |         This is not recommended since some estimators will run in sequential
21 |         mode instead of leveraging thread-based parallelism.
22 | 
23 |         You can find instructions to build scikit-learn with OpenMP at this
24 |         address:
25 | 
26 |             https://scikit-learn.org/{}/developers/advanced_installation.html
27 | 
28 |         You can skip this test by setting the environment variable
29 |         SKLEARN_SKIP_OPENMP_TEST to any value.
30 |         """
31 |     ).format(base_url)
32 | 
33 |     assert _openmp_parallelism_enabled(), err_msg
34 | 


--------------------------------------------------------------------------------
/asv_benchmarks/benchmarks/neighbors.py:
--------------------------------------------------------------------------------
 1 | from sklearn.neighbors import KNeighborsClassifier
 2 | 
 3 | from .common import Benchmark, Estimator, Predictor
 4 | from .datasets import _20newsgroups_lowdim_dataset
 5 | from .utils import make_gen_classif_scorers
 6 | 
 7 | 
 8 | class KNeighborsClassifierBenchmark(Predictor, Estimator, Benchmark):
 9 |     """
10 |     Benchmarks for KNeighborsClassifier.
11 |     """
12 | 
13 |     param_names = ["algorithm", "dimension", "n_jobs"]
14 |     params = (["brute", "kd_tree", "ball_tree"], ["low", "high"], Benchmark.n_jobs_vals)
15 | 
16 |     def setup_cache(self):
17 |         super().setup_cache()
18 | 
19 |     def make_data(self, params):
20 |         algorithm, dimension, n_jobs = params
21 | 
22 |         if Benchmark.data_size == "large":
23 |             n_components = 40 if dimension == "low" else 200
24 |         else:
25 |             n_components = 10 if dimension == "low" else 50
26 | 
27 |         data = _20newsgroups_lowdim_dataset(n_components=n_components)
28 | 
29 |         return data
30 | 
31 |     def make_estimator(self, params):
32 |         algorithm, dimension, n_jobs = params
33 | 
34 |         estimator = KNeighborsClassifier(algorithm=algorithm, n_jobs=n_jobs)
35 | 
36 |         return estimator
37 | 
38 |     def make_scorers(self):
39 |         make_gen_classif_scorers(self)
40 | 


--------------------------------------------------------------------------------
/sklearn/datasets/descr/covtype.rst:
--------------------------------------------------------------------------------
 1 | .. _covtype_dataset:
 2 | 
 3 | Forest covertypes
 4 | -----------------
 5 | 
 6 | The samples in this dataset correspond to 30×30m patches of forest in the US,
 7 | collected for the task of predicting each patch's cover type,
 8 | i.e. the dominant species of tree.
 9 | There are seven covertypes, making this a multiclass classification problem.
10 | Each sample has 54 features, described on the
11 | `dataset's homepage <https://archive.ics.uci.edu/ml/datasets/Covertype>`__.
12 | Some of the features are boolean indicators,
13 | while others are discrete or continuous measurements.
14 | 
15 | **Data Set Characteristics:**
16 | 
17 |     =================   ============
18 |     Classes                        7
19 |     Samples total             581012
20 |     Dimensionality                54
21 |     Features                     int
22 |     =================   ============
23 | 
24 | :func:`sklearn.datasets.fetch_covtype` will load the covertype dataset;
25 | it returns a dictionary-like 'Bunch' object
26 | with the feature matrix in the ``data`` member
27 | and the target values in ``target``. If optional argument 'as_frame' is
28 | set to 'True', it will return ``data`` and ``target`` as pandas
29 | data frame, and there will be an additional member ``frame`` as well.
30 | The dataset will be downloaded from the web if necessary.
31 | 


--------------------------------------------------------------------------------
/examples/exercises/plot_cv_digits.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =============================================
 3 | Cross-validation on Digits Dataset Exercise
 4 | =============================================
 5 | 
 6 | A tutorial exercise using Cross-validation with an SVM on the Digits dataset.
 7 | 
 8 | This exercise is used in the :ref:`cv_generators_tut` part of the
 9 | :ref:`model_selection_tut` section of the :ref:`stat_learn_tut_index`.
10 | 
11 | """
12 | 
13 | import numpy as np
14 | from sklearn.model_selection import cross_val_score
15 | from sklearn import datasets, svm
16 | 
17 | X, y = datasets.load_digits(return_X_y=True)
18 | 
19 | svc = svm.SVC(kernel="linear")
20 | C_s = np.logspace(-10, 0, 10)
21 | 
22 | scores = list()
23 | scores_std = list()
24 | for C in C_s:
25 |     svc.C = C
26 |     this_scores = cross_val_score(svc, X, y, n_jobs=1)
27 |     scores.append(np.mean(this_scores))
28 |     scores_std.append(np.std(this_scores))
29 | 
30 | # Do the plotting
31 | import matplotlib.pyplot as plt
32 | 
33 | plt.figure()
34 | plt.semilogx(C_s, scores)
35 | plt.semilogx(C_s, np.array(scores) + np.array(scores_std), "b--")
36 | plt.semilogx(C_s, np.array(scores) - np.array(scores_std), "b--")
37 | locs, labels = plt.yticks()
38 | plt.yticks(locs, list(map(lambda x: "%g" % x, locs)))
39 | plt.ylabel("CV score")
40 | plt.xlabel("Parameter C")
41 | plt.ylim(0, 1.1)
42 | plt.show()
43 | 


--------------------------------------------------------------------------------
/examples/cluster/plot_kmeans_plusplus.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================================
 3 | An example of K-Means++ initialization
 4 | ===========================================================
 5 | 
 6 | An example to show the output of the :func:`sklearn.cluster.kmeans_plusplus`
 7 | function for generating initial seeds for clustering.
 8 | 
 9 | K-Means++ is used as the default initialization for :ref:`k_means`.
10 | 
11 | """
12 | 
13 | from sklearn.cluster import kmeans_plusplus
14 | from sklearn.datasets import make_blobs
15 | import matplotlib.pyplot as plt
16 | 
17 | # Generate sample data
18 | n_samples = 4000
19 | n_components = 4
20 | 
21 | X, y_true = make_blobs(
22 |     n_samples=n_samples, centers=n_components, cluster_std=0.60, random_state=0
23 | )
24 | X = X[:, ::-1]
25 | 
26 | # Calculate seeds from kmeans++
27 | centers_init, indices = kmeans_plusplus(X, n_clusters=4, random_state=0)
28 | 
29 | # Plot init seeds along side sample data
30 | plt.figure(1)
31 | colors = ["#4EACC5", "#FF9C34", "#4E9A06", "m"]
32 | 
33 | for k, col in enumerate(colors):
34 |     cluster_data = y_true == k
35 |     plt.scatter(X[cluster_data, 0], X[cluster_data, 1], c=col, marker=".", s=10)
36 | 
37 | plt.scatter(centers_init[:, 0], centers_init[:, 1], c="b", s=50)
38 | plt.title("K-Means++ Initialization")
39 | plt.xticks([])
40 | plt.yticks([])
41 | plt.show()
42 | 


--------------------------------------------------------------------------------
/sklearn/utils/src/MurmurHash3.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
 3 | // domain. The author hereby disclaims copyright to this source code.
 4 | 
 5 | #ifndef _MURMURHASH3_H_
 6 | #define _MURMURHASH3_H_
 7 | 
 8 | //-----------------------------------------------------------------------------
 9 | // Platform-specific functions and macros
10 | 
11 | // Microsoft Visual Studio
12 | 
13 | #if defined(_MSC_VER)
14 | 
15 | typedef unsigned char uint8_t;
16 | typedef unsigned long uint32_t;
17 | typedef unsigned __int64 uint64_t;
18 | 
19 | // Other compilers
20 | 
21 | #else	// defined(_MSC_VER)
22 | 
23 | #include <stdint.h>
24 | 
25 | #endif // !defined(_MSC_VER)
26 | 
27 | //-----------------------------------------------------------------------------
28 | #ifdef __cplusplus
29 | extern "C" {
30 | #endif
31 | 
32 | 
33 | void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
34 | 
35 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
36 | 
37 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
38 | 
39 | #ifdef __cplusplus
40 | }
41 | #endif
42 | 
43 | //-----------------------------------------------------------------------------
44 | 
45 | #endif // _MURMURHASH3_H_
46 | 


--------------------------------------------------------------------------------
/sklearn/experimental/enable_halving_search_cv.py:
--------------------------------------------------------------------------------
 1 | """Enables Successive Halving search-estimators
 2 | 
 3 | The API and results of these estimators might change without any deprecation
 4 | cycle.
 5 | 
 6 | Importing this file dynamically sets the
 7 | :class:`~sklearn.model_selection.HalvingRandomSearchCV` and
 8 | :class:`~sklearn.model_selection.HalvingGridSearchCV` as attributes of the
 9 | `model_selection` module::
10 | 
11 |     >>> # explicitly require this experimental feature
12 |     >>> from sklearn.experimental import enable_halving_search_cv # noqa
13 |     >>> # now you can import normally from model_selection
14 |     >>> from sklearn.model_selection import HalvingRandomSearchCV
15 |     >>> from sklearn.model_selection import HalvingGridSearchCV
16 | 
17 | 
18 | The ``# noqa`` comment comment can be removed: it just tells linters like
19 | flake8 to ignore the import, which appears as unused.
20 | """
21 | 
22 | from ..model_selection._search_successive_halving import (
23 |     HalvingRandomSearchCV,
24 |     HalvingGridSearchCV,
25 | )
26 | 
27 | from .. import model_selection
28 | 
29 | # use settattr to avoid mypy errors when monkeypatching
30 | setattr(model_selection, "HalvingRandomSearchCV", HalvingRandomSearchCV)
31 | setattr(model_selection, "HalvingGridSearchCV", HalvingGridSearchCV)
32 | 
33 | model_selection.__all__ += ["HalvingRandomSearchCV", "HalvingGridSearchCV"]
34 | 


--------------------------------------------------------------------------------
/maint_tools/sort_whats_new.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Sorts what's new entries with per-module headings.
 3 | # Pass what's new entries on stdin.
 4 | 
 5 | import sys
 6 | import re
 7 | from collections import defaultdict
 8 | 
 9 | LABEL_ORDER = ["MajorFeature", "Feature", "Enhancement", "Efficiency", "Fix", "API"]
10 | 
11 | 
12 | def entry_sort_key(s):
13 |     if s.startswith("- |"):
14 |         return LABEL_ORDER.index(s.split("|")[1])
15 |     else:
16 |         return -1
17 | 
18 | 
19 | # discard headings and other non-entry lines
20 | text = "".join(l for l in sys.stdin if l.startswith("- ") or l.startswith(" "))
21 | 
22 | bucketed = defaultdict(list)
23 | 
24 | for entry in re.split("\n(?=- )", text.strip()):
25 |     modules = re.findall(
26 |         r":(?:func|meth|mod|class):" r"`(?:[^<`]*<|~)?(?:sklearn.)?([a-z]\w+)", entry
27 |     )
28 |     modules = set(modules)
29 |     if len(modules) > 1:
30 |         key = "Multiple modules"
31 |     elif modules:
32 |         key = ":mod:`sklearn.%s`" % next(iter(modules))
33 |     else:
34 |         key = "Miscellaneous"
35 |     bucketed[key].append(entry)
36 |     entry = entry.strip() + "\n"
37 | 
38 | everything = []
39 | for key, bucket in sorted(bucketed.items()):
40 |     everything.append(key + "\n" + "." * len(key))
41 |     bucket.sort(key=entry_sort_key)
42 |     everything.extend(bucket)
43 | print("\n\n".join(everything))
44 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy
 3 | 
 4 | from sklearn._build_utils import gen_from_templates
 5 | 
 6 | 
 7 | def configuration(parent_package="", top_path=None):
 8 |     from numpy.distutils.misc_util import Configuration
 9 | 
10 |     config = Configuration("linear_model", parent_package, top_path)
11 | 
12 |     libraries = []
13 |     if os.name == "posix":
14 |         libraries.append("m")
15 | 
16 |     config.add_extension(
17 |         "_cd_fast",
18 |         sources=["_cd_fast.pyx"],
19 |         include_dirs=numpy.get_include(),
20 |         libraries=libraries,
21 |     )
22 | 
23 |     config.add_extension(
24 |         "_sgd_fast",
25 |         sources=["_sgd_fast.pyx"],
26 |         include_dirs=numpy.get_include(),
27 |         libraries=libraries,
28 |     )
29 | 
30 |     # generate sag_fast from template
31 |     templates = ["sklearn/linear_model/_sag_fast.pyx.tp"]
32 |     gen_from_templates(templates)
33 | 
34 |     config.add_extension(
35 |         "_sag_fast", sources=["_sag_fast.pyx"], include_dirs=numpy.get_include()
36 |     )
37 | 
38 |     # add other directories
39 |     config.add_subpackage("tests")
40 |     config.add_subpackage("_glm")
41 |     config.add_subpackage("_glm/tests")
42 | 
43 |     return config
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     from numpy.distutils.core import setup
48 | 
49 |     setup(**configuration(top_path="").todict())
50 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.neighbors` module implements the k-nearest neighbors
 3 | algorithm.
 4 | """
 5 | 
 6 | from ._ball_tree import BallTree
 7 | from ._kd_tree import KDTree
 8 | from ._distance_metric import DistanceMetric
 9 | from ._graph import kneighbors_graph, radius_neighbors_graph
10 | from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer
11 | from ._unsupervised import NearestNeighbors
12 | from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier
13 | from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor
14 | from ._nearest_centroid import NearestCentroid
15 | from ._kde import KernelDensity
16 | from ._lof import LocalOutlierFactor
17 | from ._nca import NeighborhoodComponentsAnalysis
18 | from ._base import VALID_METRICS, VALID_METRICS_SPARSE
19 | 
20 | __all__ = [
21 |     "BallTree",
22 |     "DistanceMetric",
23 |     "KDTree",
24 |     "KNeighborsClassifier",
25 |     "KNeighborsRegressor",
26 |     "KNeighborsTransformer",
27 |     "NearestCentroid",
28 |     "NearestNeighbors",
29 |     "RadiusNeighborsClassifier",
30 |     "RadiusNeighborsRegressor",
31 |     "RadiusNeighborsTransformer",
32 |     "kneighbors_graph",
33 |     "radius_neighbors_graph",
34 |     "KernelDensity",
35 |     "LocalOutlierFactor",
36 |     "NeighborhoodComponentsAnalysis",
37 |     "VALID_METRICS",
38 |     "VALID_METRICS_SPARSE",
39 | ]
40 | 


--------------------------------------------------------------------------------
/examples/linear_model/plot_sgd_separating_hyperplane.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =========================================
 3 | SGD: Maximum margin separating hyperplane
 4 | =========================================
 5 | 
 6 | Plot the maximum margin separating hyperplane within a two-class
 7 | separable dataset using a linear Support Vector Machines classifier
 8 | trained using SGD.
 9 | 
10 | """
11 | 
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | from sklearn.linear_model import SGDClassifier
15 | from sklearn.datasets import make_blobs
16 | 
17 | # we create 50 separable points
18 | X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)
19 | 
20 | # fit the model
21 | clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200)
22 | 
23 | clf.fit(X, Y)
24 | 
25 | # plot the line, the points, and the nearest vectors to the plane
26 | xx = np.linspace(-1, 5, 10)
27 | yy = np.linspace(-1, 5, 10)
28 | 
29 | X1, X2 = np.meshgrid(xx, yy)
30 | Z = np.empty(X1.shape)
31 | for (i, j), val in np.ndenumerate(X1):
32 |     x1 = val
33 |     x2 = X2[i, j]
34 |     p = clf.decision_function([[x1, x2]])
35 |     Z[i, j] = p[0]
36 | levels = [-1.0, 0.0, 1.0]
37 | linestyles = ["dashed", "solid", "dashed"]
38 | colors = "k"
39 | plt.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
40 | plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, edgecolor="black", s=20)
41 | 
42 | plt.axis("tight")
43 | plt.show()
44 | 


--------------------------------------------------------------------------------
/sklearn/tree/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy
 4 | from numpy.distutils.misc_util import Configuration
 5 | 
 6 | 
 7 | def configuration(parent_package="", top_path=None):
 8 |     config = Configuration("tree", parent_package, top_path)
 9 |     libraries = []
10 |     if os.name == "posix":
11 |         libraries.append("m")
12 |     config.add_extension(
13 |         "_tree",
14 |         sources=["_tree.pyx"],
15 |         include_dirs=[numpy.get_include()],
16 |         libraries=libraries,
17 |         extra_compile_args=["-O3"],
18 |     )
19 |     config.add_extension(
20 |         "_splitter",
21 |         sources=["_splitter.pyx"],
22 |         include_dirs=[numpy.get_include()],
23 |         libraries=libraries,
24 |         extra_compile_args=["-O3"],
25 |     )
26 |     config.add_extension(
27 |         "_criterion",
28 |         sources=["_criterion.pyx"],
29 |         include_dirs=[numpy.get_include()],
30 |         libraries=libraries,
31 |         extra_compile_args=["-O3"],
32 |     )
33 |     config.add_extension(
34 |         "_utils",
35 |         sources=["_utils.pyx"],
36 |         include_dirs=[numpy.get_include()],
37 |         libraries=libraries,
38 |         extra_compile_args=["-O3"],
39 |     )
40 | 
41 |     config.add_subpackage("tests")
42 | 
43 |     return config
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     from numpy.distutils.core import setup
48 | 
49 |     setup(**configuration().todict())
50 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.so
 3 | *.pyd
 4 | *~
 5 | .#*
 6 | *.lprof
 7 | *.swp
 8 | *.swo
 9 | .DS_Store
10 | build
11 | sklearn/datasets/__config__.py
12 | sklearn/**/*.html
13 | 
14 | dist/
15 | MANIFEST
16 | doc/_build/
17 | doc/auto_examples/
18 | doc/modules/generated/
19 | doc/datasets/generated/
20 | doc/min_dependency_table.rst
21 | doc/min_dependency_substitutions.rst
22 | *.pdf
23 | pip-log.txt
24 | scikit_learn.egg-info/
25 | .coverage
26 | coverage
27 | *.py,cover
28 | .tags*
29 | tags
30 | covtype.data.gz
31 | 20news-18828/
32 | 20news-18828.tar.gz
33 | coverages.zip
34 | samples.zip
35 | doc/coverages.zip
36 | doc/samples.zip
37 | coverages
38 | samples
39 | doc/coverages
40 | doc/samples
41 | *.prof
42 | .tox/
43 | .coverage
44 | pip-wheel-metadata
45 | 
46 | lfw_preprocessed/
47 | nips2010_pdf/
48 | 
49 | *.nt.bz2
50 | *.tar.gz
51 | *.tgz
52 | 
53 | examples/cluster/joblib
54 | reuters/
55 | benchmarks/bench_covertype_data/
56 | 
57 | *.prefs
58 | .pydevproject
59 | .idea
60 | .vscode
61 | 
62 | *.c
63 | *.cpp
64 | 
65 | !/**/src/**/*.c
66 | !/**/src/**/*.cpp
67 | *.sln
68 | *.pyproj
69 | 
70 | # Used by py.test
71 | .cache
72 | .pytest_cache/
73 | _configtest.o.d
74 | 
75 | # Used by mypy
76 | .mypy_cache/
77 | 
78 | # files generated from a template
79 | sklearn/utils/_seq_dataset.pyx
80 | sklearn/utils/_seq_dataset.pxd
81 | sklearn/utils/_weight_vector.pyx
82 | sklearn/utils/_weight_vector.pxd
83 | sklearn/linear_model/_sag_fast.pyx
84 | 


--------------------------------------------------------------------------------
/sklearn/ensemble/_hist_gradient_boosting/common.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # Y_DYTPE is the dtype to which the targets y are converted to. This is also
 4 | # dtype for leaf values, gains, and sums of gradients / hessians. The gradients
 5 | # and hessians arrays are stored as floats to avoid using too much memory.
 6 | Y_DTYPE = np.float64
 7 | X_DTYPE = np.float64
 8 | X_BINNED_DTYPE = np.uint8  # hence max_bins == 256
 9 | # dtype for gradients and hessians arrays
10 | G_H_DTYPE = np.float32
11 | X_BITSET_INNER_DTYPE = np.uint32
12 | 
13 | HISTOGRAM_DTYPE = np.dtype([
14 |     ('sum_gradients', Y_DTYPE),  # sum of sample gradients in bin
15 |     ('sum_hessians', Y_DTYPE),  # sum of sample hessians in bin
16 |     ('count', np.uint32),  # number of samples in bin
17 | ])
18 | 
19 | PREDICTOR_RECORD_DTYPE = np.dtype([
20 |     ('value', Y_DTYPE),
21 |     ('count', np.uint32),
22 |     ('feature_idx', np.uint32),
23 |     ('num_threshold', X_DTYPE),
24 |     ('missing_go_to_left', np.uint8),
25 |     ('left', np.uint32),
26 |     ('right', np.uint32),
27 |     ('gain', Y_DTYPE),
28 |     ('depth', np.uint32),
29 |     ('is_leaf', np.uint8),
30 |     ('bin_threshold', X_BINNED_DTYPE),
31 |     ('is_categorical', np.uint8),
32 |     # The index of the corresponding bitsets in the Predictor's bitset arrays.
33 |     # Only used if is_categorical is True
34 |     ('bitset_idx', np.uint32)
35 | ])
36 | 
37 | ALMOST_INF = 1e300  # see LightGBM AvoidInf()
38 | 


--------------------------------------------------------------------------------
/doc/modules/isotonic.rst:
--------------------------------------------------------------------------------
 1 | .. _isotonic:
 2 | 
 3 | ===================
 4 | Isotonic regression
 5 | ===================
 6 | 
 7 | .. currentmodule:: sklearn.isotonic
 8 | 
 9 | The class :class:`IsotonicRegression` fits a non-decreasing real function to
10 | 1-dimensional data. It solves the following problem:
11 | 
12 |   minimize :math:`\sum_i w_i (y_i - \hat{y}_i)^2`
13 | 
14 |   subject to :math:`\hat{y}_i \le \hat{y}_j` whenever :math:`X_i \le X_j`,
15 | 
16 | where the weights :math:`w_i` are strictly positive, and both `X` and `y` are
17 | arbitrary real quantities.
18 | 
19 | The `increasing` parameter changes the constraint to
20 | :math:`\hat{y}_i \ge \hat{y}_j` whenever :math:`X_i \le X_j`. Setting it to
21 | 'auto' will automatically choose the constraint based on `Spearman's rank
22 | correlation coefficient
23 | <https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_.
24 | 
25 | :class:`IsotonicRegression` produces a series of predictions
26 | :math:`\hat{y}_i` for the training data which are the closest to the targets
27 | :math:`y` in terms of mean squared error. These predictions are interpolated
28 | for predicting to unseen data. The predictions of :class:`IsotonicRegression`
29 | thus form a function that is piecewise linear:
30 | 
31 | .. figure:: ../auto_examples/miscellaneous/images/sphx_glr_plot_isotonic_regression_001.png
32 |    :target: ../auto_examples/miscellaneous/plot_isotonic_regression.html
33 |    :align: center
34 | 


--------------------------------------------------------------------------------
/examples/linear_model/plot_sgd_loss_functions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==========================
 3 | SGD: convex loss functions
 4 | ==========================
 5 | 
 6 | A plot that compares the various convex loss functions supported by
 7 | :class:`~sklearn.linear_model.SGDClassifier` .
 8 | 
 9 | """
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | 
14 | 
15 | def modified_huber_loss(y_true, y_pred):
16 |     z = y_pred * y_true
17 |     loss = -4 * z
18 |     loss[z >= -1] = (1 - z[z >= -1]) ** 2
19 |     loss[z >= 1.0] = 0
20 |     return loss
21 | 
22 | 
23 | xmin, xmax = -4, 4
24 | xx = np.linspace(xmin, xmax, 100)
25 | lw = 2
26 | plt.plot([xmin, 0, 0, xmax], [1, 1, 0, 0], color="gold", lw=lw, label="Zero-one loss")
27 | plt.plot(xx, np.where(xx < 1, 1 - xx, 0), color="teal", lw=lw, label="Hinge loss")
28 | plt.plot(xx, -np.minimum(xx, 0), color="yellowgreen", lw=lw, label="Perceptron loss")
29 | plt.plot(xx, np.log2(1 + np.exp(-xx)), color="cornflowerblue", lw=lw, label="Log loss")
30 | plt.plot(
31 |     xx,
32 |     np.where(xx < 1, 1 - xx, 0) ** 2,
33 |     color="orange",
34 |     lw=lw,
35 |     label="Squared hinge loss",
36 | )
37 | plt.plot(
38 |     xx,
39 |     modified_huber_loss(xx, 1),
40 |     color="darkorchid",
41 |     lw=lw,
42 |     linestyle="--",
43 |     label="Modified Huber loss",
44 | )
45 | plt.ylim((0, 8))
46 | plt.legend(loc="upper right")
47 | plt.xlabel(r"Decision function $f(x)$")
48 | plt.ylabel("$L(y=1, f(x))$")
49 | plt.show()
50 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/_glm/tests/test_link.py:
--------------------------------------------------------------------------------
 1 | # Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
 2 | #
 3 | # License: BSD 3 clause
 4 | import numpy as np
 5 | from numpy.testing import assert_allclose
 6 | import pytest
 7 | from scipy.optimize import check_grad
 8 | 
 9 | from sklearn.linear_model._glm.link import (
10 |     IdentityLink,
11 |     LogLink,
12 |     LogitLink,
13 | )
14 | 
15 | 
16 | LINK_FUNCTIONS = [IdentityLink, LogLink, LogitLink]
17 | 
18 | 
19 | @pytest.mark.parametrize("Link", LINK_FUNCTIONS)
20 | def test_link_properties(Link):
21 |     """Test link inverse and derivative."""
22 |     rng = np.random.RandomState(42)
23 |     x = rng.rand(100) * 100
24 |     link = Link()
25 |     if isinstance(link, LogitLink):
26 |         # careful for large x, note expit(36) = 1
27 |         # limit max eta to 15
28 |         x = x / 100 * 15
29 |     assert_allclose(link(link.inverse(x)), x)
30 |     # if g(h(x)) = x, then g'(h(x)) = 1/h'(x)
31 |     # g = link, h = link.inverse
32 |     assert_allclose(link.derivative(link.inverse(x)), 1 / link.inverse_derivative(x))
33 | 
34 | 
35 | @pytest.mark.parametrize("Link", LINK_FUNCTIONS)
36 | def test_link_derivative(Link):
37 |     link = Link()
38 |     x = np.random.RandomState(0).rand(1)
39 |     err = check_grad(link, link.derivative, x) / link.derivative(x)
40 |     assert abs(err) < 1e-6
41 | 
42 |     err = check_grad(link.inverse, link.inverse_derivative, x) / link.derivative(x)
43 |     assert abs(err) < 1e-6
44 | 


--------------------------------------------------------------------------------
/sklearn/decomposition/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.decomposition` module includes matrix decomposition
 3 | algorithms, including among others PCA, NMF or ICA. Most of the algorithms of
 4 | this module can be regarded as dimensionality reduction techniques.
 5 | """
 6 | 
 7 | 
 8 | from ._nmf import NMF, non_negative_factorization
 9 | from ._pca import PCA
10 | from ._incremental_pca import IncrementalPCA
11 | from ._kernel_pca import KernelPCA
12 | from ._sparse_pca import SparsePCA, MiniBatchSparsePCA
13 | from ._truncated_svd import TruncatedSVD
14 | from ._fastica import FastICA, fastica
15 | from ._dict_learning import (
16 |     dict_learning,
17 |     dict_learning_online,
18 |     sparse_encode,
19 |     DictionaryLearning,
20 |     MiniBatchDictionaryLearning,
21 |     SparseCoder,
22 | )
23 | from ._factor_analysis import FactorAnalysis
24 | from ..utils.extmath import randomized_svd
25 | from ._lda import LatentDirichletAllocation
26 | 
27 | 
28 | __all__ = [
29 |     "DictionaryLearning",
30 |     "FastICA",
31 |     "IncrementalPCA",
32 |     "KernelPCA",
33 |     "MiniBatchDictionaryLearning",
34 |     "MiniBatchSparsePCA",
35 |     "NMF",
36 |     "PCA",
37 |     "SparseCoder",
38 |     "SparsePCA",
39 |     "dict_learning",
40 |     "dict_learning_online",
41 |     "fastica",
42 |     "non_negative_factorization",
43 |     "randomized_svd",
44 |     "sparse_encode",
45 |     "FactorAnalysis",
46 |     "TruncatedSVD",
47 |     "LatentDirichletAllocation",
48 | ]
49 | 


--------------------------------------------------------------------------------
/benchmarks/bench_plot_ward.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Benchmark scikit-learn's Ward implement compared to SciPy's
 3 | """
 4 | 
 5 | import time
 6 | 
 7 | import numpy as np
 8 | from scipy.cluster import hierarchy
 9 | import matplotlib.pyplot as plt
10 | 
11 | from sklearn.cluster import AgglomerativeClustering
12 | 
13 | ward = AgglomerativeClustering(n_clusters=3, linkage="ward")
14 | 
15 | n_samples = np.logspace(0.5, 3, 9)
16 | n_features = np.logspace(1, 3.5, 7)
17 | N_samples, N_features = np.meshgrid(n_samples, n_features)
18 | scikits_time = np.zeros(N_samples.shape)
19 | scipy_time = np.zeros(N_samples.shape)
20 | 
21 | for i, n in enumerate(n_samples):
22 |     for j, p in enumerate(n_features):
23 |         X = np.random.normal(size=(n, p))
24 |         t0 = time.time()
25 |         ward.fit(X)
26 |         scikits_time[j, i] = time.time() - t0
27 |         t0 = time.time()
28 |         hierarchy.ward(X)
29 |         scipy_time[j, i] = time.time() - t0
30 | 
31 | ratio = scikits_time / scipy_time
32 | 
33 | plt.figure("scikit-learn Ward's method benchmark results")
34 | plt.imshow(np.log(ratio), aspect="auto", origin="lower")
35 | plt.colorbar()
36 | plt.contour(
37 |     ratio,
38 |     levels=[
39 |         1,
40 |     ],
41 |     colors="k",
42 | )
43 | plt.yticks(range(len(n_features)), n_features.astype(int))
44 | plt.ylabel("N features")
45 | plt.xticks(range(len(n_samples)), n_samples.astype(int))
46 | plt.xlabel("N samples")
47 | plt.title("Scikit's time, in units of scipy time (log)")
48 | plt.show()
49 | 


--------------------------------------------------------------------------------
/sklearn/ensemble/_hist_gradient_boosting/common.pxd:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | np.import_array()
 5 | 
 6 | 
 7 | ctypedef np.npy_float64 X_DTYPE_C
 8 | ctypedef np.npy_uint8 X_BINNED_DTYPE_C
 9 | ctypedef np.npy_float64 Y_DTYPE_C
10 | ctypedef np.npy_float32 G_H_DTYPE_C
11 | ctypedef np.npy_uint32 BITSET_INNER_DTYPE_C
12 | ctypedef BITSET_INNER_DTYPE_C[8] BITSET_DTYPE_C
13 | 
14 | cdef packed struct hist_struct:
15 |     # Same as histogram dtype but we need a struct to declare views. It needs
16 |     # to be packed since by default numpy dtypes aren't aligned
17 |     Y_DTYPE_C sum_gradients
18 |     Y_DTYPE_C sum_hessians
19 |     unsigned int count
20 | 
21 | 
22 | cdef packed struct node_struct:
23 |     # Equivalent struct to PREDICTOR_RECORD_DTYPE to use in memory views. It
24 |     # needs to be packed since by default numpy dtypes aren't aligned
25 |     Y_DTYPE_C value
26 |     unsigned int count
27 |     unsigned int feature_idx
28 |     X_DTYPE_C num_threshold
29 |     unsigned char missing_go_to_left
30 |     unsigned int left
31 |     unsigned int right
32 |     Y_DTYPE_C gain
33 |     unsigned int depth
34 |     unsigned char is_leaf
35 |     X_BINNED_DTYPE_C bin_threshold
36 |     unsigned char is_categorical
37 |     # The index of the corresponding bitsets in the Predictor's bitset arrays.
38 |     # Only used if is_categorical is True
39 |     unsigned int bitset_idx
40 | 
41 | cpdef enum MonotonicConstraint:
42 |     NO_CST = 0
43 |     POS = 1
44 |     NEG = -1
45 | 


--------------------------------------------------------------------------------