├── .binder
├── postBuild
└── requirements.txt
├── .circleci
├── artifact_path
└── config.yml
├── .codecov.yml
├── .coveragerc
├── .git-blame-ignore-revs
├── .gitattributes
├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.yml
│ ├── config.yml
│ ├── doc_improvement.yml
│ └── feature_request.yml
├── PULL_REQUEST_TEMPLATE.md
├── labeler-file-extensions.yml
├── labeler-module.yml
├── scripts
│ └── label_title_regex.py
└── workflows
│ ├── assign.yml
│ ├── check-changelog.yml
│ ├── check-manifest.yml
│ ├── label-blank-issue.yml
│ ├── labeler-module.yml
│ ├── labeler-title-regex.yml
│ ├── publish_pypi.yml
│ ├── twitter.yml
│ ├── unassign.yml
│ ├── update_tracking_issue.yml
│ └── wheels.yml
├── .gitignore
├── .mailmap
├── .pre-commit-config.yaml
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── COPYING
├── MANIFEST.in
├── Makefile
├── README.rst
├── SECURITY.md
├── asv_benchmarks
├── .gitignore
├── asv.conf.json
└── benchmarks
│ ├── __init__.py
│ ├── cluster.py
│ ├── common.py
│ ├── config.json
│ ├── datasets.py
│ ├── decomposition.py
│ ├── ensemble.py
│ ├── linear_model.py
│ ├── manifold.py
│ ├── metrics.py
│ ├── model_selection.py
│ ├── neighbors.py
│ ├── svm.py
│ └── utils.py
├── azure-pipelines.yml
├── benchmarks
├── .gitignore
├── bench_20newsgroups.py
├── bench_covertype.py
├── bench_feature_expansions.py
├── bench_glm.py
├── bench_glmnet.py
├── bench_hist_gradient_boosting.py
├── bench_hist_gradient_boosting_adult.py
├── bench_hist_gradient_boosting_categorical_only.py
├── bench_hist_gradient_boosting_higgsboson.py
├── bench_hist_gradient_boosting_threading.py
├── bench_isolation_forest.py
├── bench_isotonic.py
├── bench_kernel_pca_solvers_time_vs_n_components.py
├── bench_kernel_pca_solvers_time_vs_n_samples.py
├── bench_lasso.py
├── bench_lof.py
├── bench_mnist.py
├── bench_multilabel_metrics.py
├── bench_online_ocsvm.py
├── bench_plot_fastkmeans.py
├── bench_plot_hierarchical.py
├── bench_plot_incremental_pca.py
├── bench_plot_lasso_path.py
├── bench_plot_neighbors.py
├── bench_plot_nmf.py
├── bench_plot_omp_lars.py
├── bench_plot_parallel_pairwise.py
├── bench_plot_polynomial_kernel_approximation.py
├── bench_plot_randomized_svd.py
├── bench_plot_svd.py
├── bench_plot_ward.py
├── bench_random_projections.py
├── bench_rcv1_logreg_convergence.py
├── bench_saga.py
├── bench_sample_without_replacement.py
├── bench_sgd_regression.py
├── bench_sparsify.py
├── bench_text_vectorizers.py
├── bench_tree.py
├── bench_tsne_mnist.py
└── plot_tsne_mnist.py
├── build_tools
├── Makefile
├── azure
│ ├── install.sh
│ ├── install_win.sh
│ ├── posix-docker.yml
│ ├── posix.yml
│ ├── test_docs.sh
│ ├── test_pytest_soft_dependency.sh
│ ├── test_script.sh
│ ├── upload_codecov.sh
│ └── windows.yml
├── circle
│ ├── build_doc.sh
│ ├── build_test_arm.sh
│ ├── build_test_pypy.sh
│ ├── checkout_merge_commit.sh
│ ├── linting.sh
│ ├── list_versions.py
│ └── push_doc.sh
├── codespell_ignore_words.txt
├── generate_authors_table.py
├── github
│ ├── Windows
│ ├── build_minimal_windows_image.sh
│ ├── build_source.sh
│ ├── build_wheels.sh
│ ├── check_build_trigger.sh
│ ├── check_wheels.py
│ ├── repair_windows_wheels.sh
│ ├── test_source.sh
│ ├── test_wheels.sh
│ ├── test_windows_wheels.sh
│ ├── upload_anaconda.sh
│ └── vendor.py
├── shared.sh
└── travis
│ ├── after_success.sh
│ ├── install.sh
│ ├── install_main.sh
│ ├── install_wheels.sh
│ ├── script.sh
│ ├── test_docs.sh
│ ├── test_script.sh
│ └── test_wheels.sh
├── conftest.py
├── doc
├── Makefile
├── README.md
├── about.rst
├── authors.rst
├── authors_emeritus.rst
├── binder
│ └── requirements.txt
├── common_pitfalls.rst
├── communication_team.rst
├── computing.rst
├── computing
│ ├── computational_performance.rst
│ ├── parallelism.rst
│ └── scaling_strategies.rst
├── conf.py
├── conftest.py
├── contents.rst
├── data_transforms.rst
├── datasets.rst
├── datasets
│ ├── loading_other_datasets.rst
│ ├── real_world.rst
│ ├── sample_generators.rst
│ └── toy_dataset.rst
├── developers
│ ├── advanced_installation.rst
│ ├── bug_triaging.rst
│ ├── contributing.rst
│ ├── develop.rst
│ ├── index.rst
│ ├── maintainer.rst
│ ├── minimal_reproducer.rst
│ ├── performance.rst
│ ├── plotting.rst
│ ├── tips.rst
│ └── utilities.rst
├── faq.rst
├── getting_started.rst
├── glossary.rst
├── governance.rst
├── images
│ ├── axa-small.png
│ ├── axa.png
│ ├── bcg-small.png
│ ├── bcg.png
│ ├── bnp-small.png
│ ├── bnp.png
│ ├── cds-logo.png
│ ├── columbia-small.png
│ ├── columbia.png
│ ├── czi_logo.svg
│ ├── dataiku-small.png
│ ├── dataiku.png
│ ├── digicosme.png
│ ├── dysco.png
│ ├── fnrs-logo-small.png
│ ├── fujitsu-small.png
│ ├── fujitsu.png
│ ├── google-small.png
│ ├── grid_search_cross_validation.png
│ ├── grid_search_workflow.png
│ ├── huggingface_logo-noborder.png
│ ├── inria-logo.jpg
│ ├── inria-small.png
│ ├── intel-small.png
│ ├── intel.png
│ ├── iris.pdf
│ ├── iris.svg
│ ├── last_digit.png
│ ├── lda_model_graph.png
│ ├── logo_APHP.png
│ ├── logo_APHP_text.png
│ ├── microsoft-small.png
│ ├── microsoft.png
│ ├── ml_map.png
│ ├── multi_org_chart.png
│ ├── multilayerperceptron_network.png
│ ├── no_image.png
│ ├── nvidia-small.png
│ ├── nvidia.png
│ ├── nyu_short_color.png
│ ├── plot_digits_classification.png
│ ├── plot_face_recognition_1.png
│ ├── plot_face_recognition_2.png
│ ├── png-logo-inria-la-fondation.png
│ ├── quansight-labs-small.png
│ ├── quansight-labs.png
│ ├── rbm_graph.png
│ ├── scikit-learn-logo-notext.png
│ ├── scikit-learn-logo-small.png
│ ├── sloan_banner.png
│ ├── sloan_logo-small.png
│ ├── sydney-primary.jpeg
│ ├── sydney-stacked-small.png
│ ├── telecom-small.png
│ ├── telecom.png
│ └── visual-studio-build-tools-selection.png
├── includes
│ ├── big_toc_css.rst
│ └── bigger_toc_css.rst
├── inspection.rst
├── install.rst
├── logos
│ ├── favicon.ico
│ ├── identity.pdf
│ ├── scikit-learn-logo-notext.png
│ ├── scikit-learn-logo-small.png
│ ├── scikit-learn-logo-thumb.png
│ ├── scikit-learn-logo.bmp
│ ├── scikit-learn-logo.png
│ └── scikit-learn-logo.svg
├── make.bat
├── model_persistence.rst
├── model_selection.rst
├── modules
│ ├── biclustering.rst
│ ├── calibration.rst
│ ├── classes.rst
│ ├── clustering.rst
│ ├── compose.rst
│ ├── covariance.rst
│ ├── cross_decomposition.rst
│ ├── cross_validation.rst
│ ├── decomposition.rst
│ ├── density.rst
│ ├── ensemble.rst
│ ├── feature_extraction.rst
│ ├── feature_selection.rst
│ ├── gaussian_process.rst
│ ├── glm_data
│ │ ├── lasso_enet_coordinate_descent.png
│ │ └── poisson_gamma_tweedie_distributions.png
│ ├── grid_search.rst
│ ├── impute.rst
│ ├── isotonic.rst
│ ├── kernel_approximation.rst
│ ├── kernel_ridge.rst
│ ├── lda_qda.rst
│ ├── learning_curve.rst
│ ├── linear_model.rst
│ ├── manifold.rst
│ ├── metrics.rst
│ ├── mixture.rst
│ ├── model_evaluation.rst
│ ├── multiclass.rst
│ ├── naive_bayes.rst
│ ├── neighbors.rst
│ ├── neural_networks_supervised.rst
│ ├── neural_networks_unsupervised.rst
│ ├── outlier_detection.rst
│ ├── partial_dependence.rst
│ ├── permutation_importance.rst
│ ├── pipeline.rst
│ ├── preprocessing.rst
│ ├── preprocessing_targets.rst
│ ├── random_projection.rst
│ ├── semi_supervised.rst
│ ├── sgd.rst
│ ├── svm.rst
│ ├── tree.rst
│ └── unsupervised_reduction.rst
├── preface.rst
├── presentations.rst
├── related_projects.rst
├── roadmap.rst
├── sphinxext
│ ├── MANIFEST.in
│ ├── add_toctree_functions.py
│ ├── custom_references_resolver.py
│ ├── doi_role.py
│ ├── github_link.py
│ └── sphinx_issues.py
├── supervised_learning.rst
├── support.rst
├── templates
│ ├── class.rst
│ ├── class_with_call.rst
│ ├── deprecated_class.rst
│ ├── deprecated_class_with_call.rst
│ ├── deprecated_class_without_init.rst
│ ├── deprecated_function.rst
│ ├── function.rst
│ ├── generate_deprecated.sh
│ ├── index.html
│ ├── numpydoc_docstring.rst
│ └── redirects.html
├── testimonials
│ ├── README.txt
│ ├── images
│ │ ├── Makefile
│ │ ├── aweber.png
│ │ ├── bestofmedia-logo.png
│ │ ├── betaworks.png
│ │ ├── birchbox.jpg
│ │ ├── bnp_paribas_cardif.png
│ │ ├── booking.png
│ │ ├── change-logo.png
│ │ ├── dataiku_logo.png
│ │ ├── datapublica.png
│ │ ├── datarobot.png
│ │ ├── evernote.png
│ │ ├── howaboutwe.png
│ │ ├── huggingface.png
│ │ ├── infonea.jpg
│ │ ├── inria.png
│ │ ├── jpmorgan.png
│ │ ├── lovely.png
│ │ ├── machinalis.png
│ │ ├── mars.png
│ │ ├── okcupid.png
│ │ ├── ottogroup_logo.png
│ │ ├── peerindex.png
│ │ ├── phimeca.png
│ │ ├── rangespan.png
│ │ ├── solido_logo.png
│ │ ├── spotify.png
│ │ ├── telecomparistech.jpg
│ │ ├── yhat.png
│ │ └── zopa.png
│ └── testimonials.rst
├── themes
│ └── scikit-learn-modern
│ │ ├── javascript.html
│ │ ├── layout.html
│ │ ├── nav.html
│ │ ├── search.html
│ │ ├── static
│ │ ├── css
│ │ │ ├── theme.css
│ │ │ └── vendor
│ │ │ │ └── bootstrap.min.css
│ │ └── js
│ │ │ ├── searchtools.js
│ │ │ └── vendor
│ │ │ └── bootstrap.min.js
│ │ └── theme.conf
├── triage_team.rst
├── tune_toc.rst
├── tutorial
│ ├── basic
│ │ └── tutorial.rst
│ ├── common_includes
│ │ └── info.txt
│ ├── index.rst
│ ├── machine_learning_map
│ │ ├── ML_MAPS_README.txt
│ │ ├── index.rst
│ │ ├── parse_path.py
│ │ ├── pyparsing.py
│ │ └── svg2imagemap.py
│ ├── statistical_inference
│ │ ├── index.rst
│ │ ├── model_selection.rst
│ │ ├── putting_together.rst
│ │ ├── settings.rst
│ │ ├── supervised_learning.rst
│ │ └── unsupervised_learning.rst
│ └── text_analytics
│ │ ├── .gitignore
│ │ ├── data
│ │ ├── languages
│ │ │ └── fetch_data.py
│ │ ├── movie_reviews
│ │ │ └── fetch_data.py
│ │ └── twenty_newsgroups
│ │ │ └── fetch_data.py
│ │ ├── skeletons
│ │ ├── exercise_01_language_train_model.py
│ │ └── exercise_02_sentiment.py
│ │ ├── solutions
│ │ ├── exercise_01_language_train_model.py
│ │ ├── exercise_02_sentiment.py
│ │ └── generate_skeletons.py
│ │ └── working_with_text_data.rst
├── unsupervised_learning.rst
├── user_guide.rst
├── visualizations.rst
├── whats_new.rst
└── whats_new
│ ├── _contributors.rst
│ ├── changelog_legend.inc
│ ├── older_versions.rst
│ ├── v0.13.rst
│ ├── v0.14.rst
│ ├── v0.15.rst
│ ├── v0.16.rst
│ ├── v0.17.rst
│ ├── v0.18.rst
│ ├── v0.19.rst
│ ├── v0.20.rst
│ ├── v0.21.rst
│ ├── v0.22.rst
│ ├── v0.23.rst
│ ├── v0.24.rst
│ ├── v1.0.rst
│ └── v1.1.rst
├── examples
├── README.txt
├── applications
│ ├── README.txt
│ ├── plot_cyclical_feature_engineering.py
│ ├── plot_digits_denoising.py
│ ├── plot_face_recognition.py
│ ├── plot_model_complexity_influence.py
│ ├── plot_out_of_core_classification.py
│ ├── plot_outlier_detection_wine.py
│ ├── plot_prediction_latency.py
│ ├── plot_species_distribution_modeling.py
│ ├── plot_stock_market.py
│ ├── plot_tomography_l1_reconstruction.py
│ ├── plot_topics_extraction_with_nmf_lda.py
│ ├── svm_gui.py
│ └── wikipedia_principal_eigenvector.py
├── bicluster
│ ├── README.txt
│ ├── plot_bicluster_newsgroups.py
│ ├── plot_spectral_biclustering.py
│ └── plot_spectral_coclustering.py
├── calibration
│ ├── README.txt
│ ├── plot_calibration.py
│ ├── plot_calibration_curve.py
│ ├── plot_calibration_multiclass.py
│ └── plot_compare_calibration.py
├── classification
│ ├── README.txt
│ ├── plot_classification_probability.py
│ ├── plot_classifier_comparison.py
│ ├── plot_digits_classification.py
│ ├── plot_lda.py
│ └── plot_lda_qda.py
├── cluster
│ ├── README.txt
│ ├── plot_adjusted_for_chance_measures.py
│ ├── plot_affinity_propagation.py
│ ├── plot_agglomerative_clustering.py
│ ├── plot_agglomerative_clustering_metrics.py
│ ├── plot_agglomerative_dendrogram.py
│ ├── plot_birch_vs_minibatchkmeans.py
│ ├── plot_cluster_comparison.py
│ ├── plot_cluster_iris.py
│ ├── plot_coin_segmentation.py
│ ├── plot_coin_ward_segmentation.py
│ ├── plot_color_quantization.py
│ ├── plot_dbscan.py
│ ├── plot_dict_face_patches.py
│ ├── plot_digits_agglomeration.py
│ ├── plot_digits_linkage.py
│ ├── plot_face_compress.py
│ ├── plot_feature_agglomeration_vs_univariate_selection.py
│ ├── plot_inductive_clustering.py
│ ├── plot_kmeans_assumptions.py
│ ├── plot_kmeans_digits.py
│ ├── plot_kmeans_plusplus.py
│ ├── plot_kmeans_silhouette_analysis.py
│ ├── plot_kmeans_stability_low_dim_dense.py
│ ├── plot_linkage_comparison.py
│ ├── plot_mean_shift.py
│ ├── plot_mini_batch_kmeans.py
│ ├── plot_optics.py
│ ├── plot_segmentation_toy.py
│ └── plot_ward_structured_vs_unstructured.py
├── compose
│ ├── README.txt
│ ├── plot_column_transformer.py
│ ├── plot_column_transformer_mixed_types.py
│ ├── plot_compare_reduction.py
│ ├── plot_digits_pipe.py
│ ├── plot_feature_union.py
│ └── plot_transformed_target.py
├── covariance
│ ├── README.txt
│ ├── plot_covariance_estimation.py
│ ├── plot_lw_vs_oas.py
│ ├── plot_mahalanobis_distances.py
│ ├── plot_robust_vs_empirical_covariance.py
│ └── plot_sparse_cov.py
├── cross_decomposition
│ ├── README.txt
│ ├── plot_compare_cross_decomposition.py
│ └── plot_pcr_vs_pls.py
├── datasets
│ ├── README.txt
│ ├── plot_digits_last_image.py
│ ├── plot_iris_dataset.py
│ ├── plot_random_dataset.py
│ └── plot_random_multilabel_dataset.py
├── decomposition
│ ├── README.txt
│ ├── plot_beta_divergence.py
│ ├── plot_faces_decomposition.py
│ ├── plot_ica_blind_source_separation.py
│ ├── plot_ica_vs_pca.py
│ ├── plot_image_denoising.py
│ ├── plot_incremental_pca.py
│ ├── plot_kernel_pca.py
│ ├── plot_pca_3d.py
│ ├── plot_pca_iris.py
│ ├── plot_pca_vs_fa_model_selection.py
│ ├── plot_pca_vs_lda.py
│ ├── plot_sparse_coding.py
│ └── plot_varimax_fa.py
├── ensemble
│ ├── README.txt
│ ├── plot_adaboost_hastie_10_2.py
│ ├── plot_adaboost_multiclass.py
│ ├── plot_adaboost_regression.py
│ ├── plot_adaboost_twoclass.py
│ ├── plot_bias_variance.py
│ ├── plot_ensemble_oob.py
│ ├── plot_feature_transformation.py
│ ├── plot_forest_importances.py
│ ├── plot_forest_importances_faces.py
│ ├── plot_forest_iris.py
│ ├── plot_gradient_boosting_categorical.py
│ ├── plot_gradient_boosting_early_stopping.py
│ ├── plot_gradient_boosting_oob.py
│ ├── plot_gradient_boosting_quantile.py
│ ├── plot_gradient_boosting_regression.py
│ ├── plot_gradient_boosting_regularization.py
│ ├── plot_isolation_forest.py
│ ├── plot_monotonic_constraints.py
│ ├── plot_random_forest_embedding.py
│ ├── plot_random_forest_regression_multioutput.py
│ ├── plot_stack_predictors.py
│ ├── plot_voting_decision_regions.py
│ ├── plot_voting_probas.py
│ └── plot_voting_regressor.py
├── exercises
│ ├── README.txt
│ ├── plot_cv_diabetes.py
│ ├── plot_cv_digits.py
│ ├── plot_digits_classification_exercise.py
│ └── plot_iris_exercise.py
├── feature_selection
│ ├── README.txt
│ ├── plot_f_test_vs_mi.py
│ ├── plot_feature_selection.py
│ ├── plot_feature_selection_pipeline.py
│ ├── plot_rfe_digits.py
│ ├── plot_rfe_with_cross_validation.py
│ └── plot_select_from_model_diabetes.py
├── gaussian_process
│ ├── README.txt
│ ├── plot_compare_gpr_krr.py
│ ├── plot_gpc.py
│ ├── plot_gpc_iris.py
│ ├── plot_gpc_isoprobability.py
│ ├── plot_gpc_xor.py
│ ├── plot_gpr_co2.py
│ ├── plot_gpr_noisy.py
│ ├── plot_gpr_noisy_targets.py
│ ├── plot_gpr_on_structured_data.py
│ └── plot_gpr_prior_posterior.py
├── impute
│ ├── README.txt
│ ├── plot_iterative_imputer_variants_comparison.py
│ └── plot_missing_values.py
├── inspection
│ ├── README.txt
│ ├── plot_linear_model_coefficient_interpretation.py
│ ├── plot_partial_dependence.py
│ ├── plot_permutation_importance.py
│ └── plot_permutation_importance_multicollinear.py
├── kernel_approximation
│ ├── README.txt
│ └── plot_scalable_poly_kernels.py
├── linear_model
│ ├── README.txt
│ ├── plot_ard.py
│ ├── plot_bayesian_ridge.py
│ ├── plot_bayesian_ridge_curvefit.py
│ ├── plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
│ ├── plot_huber_vs_ridge.py
│ ├── plot_iris_logistic.py
│ ├── plot_lasso_and_elasticnet.py
│ ├── plot_lasso_coordinate_descent_path.py
│ ├── plot_lasso_dense_vs_sparse_data.py
│ ├── plot_lasso_lars.py
│ ├── plot_lasso_lars_ic.py
│ ├── plot_lasso_model_selection.py
│ ├── plot_logistic.py
│ ├── plot_logistic_l1_l2_sparsity.py
│ ├── plot_logistic_multinomial.py
│ ├── plot_logistic_path.py
│ ├── plot_multi_task_lasso_support.py
│ ├── plot_nnls.py
│ ├── plot_ols.py
│ ├── plot_ols_3d.py
│ ├── plot_ols_ridge_variance.py
│ ├── plot_omp.py
│ ├── plot_poisson_regression_non_normal_loss.py
│ ├── plot_polynomial_interpolation.py
│ ├── plot_quantile_regression.py
│ ├── plot_ransac.py
│ ├── plot_ridge_coeffs.py
│ ├── plot_ridge_path.py
│ ├── plot_robust_fit.py
│ ├── plot_sgd_comparison.py
│ ├── plot_sgd_early_stopping.py
│ ├── plot_sgd_iris.py
│ ├── plot_sgd_loss_functions.py
│ ├── plot_sgd_penalties.py
│ ├── plot_sgd_separating_hyperplane.py
│ ├── plot_sgd_weighted_samples.py
│ ├── plot_sgdocsvm_vs_ocsvm.py
│ ├── plot_sparse_logistic_regression_20newsgroups.py
│ ├── plot_sparse_logistic_regression_mnist.py
│ ├── plot_theilsen.py
│ └── plot_tweedie_regression_insurance_claims.py
├── manifold
│ ├── README.txt
│ ├── plot_compare_methods.py
│ ├── plot_lle_digits.py
│ ├── plot_manifold_sphere.py
│ ├── plot_mds.py
│ ├── plot_swissroll.py
│ └── plot_t_sne_perplexity.py
├── miscellaneous
│ ├── README.txt
│ ├── plot_anomaly_comparison.py
│ ├── plot_changed_only_pprint_parameter.py
│ ├── plot_display_object_visualization.py
│ ├── plot_isotonic_regression.py
│ ├── plot_johnson_lindenstrauss_bound.py
│ ├── plot_kernel_approximation.py
│ ├── plot_kernel_ridge_regression.py
│ ├── plot_multilabel.py
│ ├── plot_multioutput_face_completion.py
│ ├── plot_partial_dependence_visualization_api.py
│ ├── plot_pipeline_display.py
│ └── plot_roc_curve_visualization_api.py
├── mixture
│ ├── README.txt
│ ├── plot_concentration_prior.py
│ ├── plot_gmm.py
│ ├── plot_gmm_covariances.py
│ ├── plot_gmm_pdf.py
│ ├── plot_gmm_selection.py
│ └── plot_gmm_sin.py
├── model_selection
│ ├── README.txt
│ ├── grid_search_text_feature_extraction.py
│ ├── plot_confusion_matrix.py
│ ├── plot_cv_indices.py
│ ├── plot_cv_predict.py
│ ├── plot_det.py
│ ├── plot_grid_search_digits.py
│ ├── plot_grid_search_refit_callable.py
│ ├── plot_grid_search_stats.py
│ ├── plot_learning_curve.py
│ ├── plot_multi_metric_evaluation.py
│ ├── plot_nested_cross_validation_iris.py
│ ├── plot_permutation_tests_for_classification.py
│ ├── plot_precision_recall.py
│ ├── plot_randomized_search.py
│ ├── plot_roc.py
│ ├── plot_roc_crossval.py
│ ├── plot_successive_halving_heatmap.py
│ ├── plot_successive_halving_iterations.py
│ ├── plot_train_error_vs_test_error.py
│ ├── plot_underfitting_overfitting.py
│ └── plot_validation_curve.py
├── multioutput
│ ├── README.txt
│ └── plot_classifier_chain_yeast.py
├── neighbors
│ ├── README.txt
│ ├── approximate_nearest_neighbors.py
│ ├── plot_caching_nearest_neighbors.py
│ ├── plot_classification.py
│ ├── plot_digits_kde_sampling.py
│ ├── plot_kde_1d.py
│ ├── plot_lof_novelty_detection.py
│ ├── plot_lof_outlier_detection.py
│ ├── plot_nca_classification.py
│ ├── plot_nca_dim_reduction.py
│ ├── plot_nca_illustration.py
│ ├── plot_nearest_centroid.py
│ ├── plot_regression.py
│ └── plot_species_kde.py
├── neural_networks
│ ├── README.txt
│ ├── plot_mlp_alpha.py
│ ├── plot_mlp_training_curves.py
│ ├── plot_mnist_filters.py
│ └── plot_rbm_logistic_classification.py
├── preprocessing
│ ├── README.txt
│ ├── plot_all_scaling.py
│ ├── plot_discretization.py
│ ├── plot_discretization_classification.py
│ ├── plot_discretization_strategies.py
│ ├── plot_map_data_to_normal.py
│ └── plot_scaling_importance.py
├── release_highlights
│ ├── README.txt
│ ├── plot_release_highlights_0_22_0.py
│ ├── plot_release_highlights_0_23_0.py
│ ├── plot_release_highlights_0_24_0.py
│ └── plot_release_highlights_1_0_0.py
├── semi_supervised
│ ├── README.txt
│ ├── plot_label_propagation_digits.py
│ ├── plot_label_propagation_digits_active_learning.py
│ ├── plot_label_propagation_structure.py
│ ├── plot_self_training_varying_threshold.py
│ ├── plot_semi_supervised_newsgroups.py
│ └── plot_semi_supervised_versus_svm_iris.py
├── svm
│ ├── README.txt
│ ├── plot_custom_kernel.py
│ ├── plot_iris_svc.py
│ ├── plot_linearsvc_support_vectors.py
│ ├── plot_oneclass.py
│ ├── plot_rbf_parameters.py
│ ├── plot_separating_hyperplane.py
│ ├── plot_separating_hyperplane_unbalanced.py
│ ├── plot_svm_anova.py
│ ├── plot_svm_kernels.py
│ ├── plot_svm_margin.py
│ ├── plot_svm_nonlinear.py
│ ├── plot_svm_regression.py
│ ├── plot_svm_scale_c.py
│ ├── plot_svm_tie_breaking.py
│ └── plot_weighted_samples.py
├── text
│ ├── README.txt
│ ├── plot_document_classification_20newsgroups.py
│ ├── plot_document_clustering.py
│ └── plot_hashing_vs_dict_vectorizer.py
└── tree
│ ├── README.txt
│ ├── plot_cost_complexity_pruning.py
│ ├── plot_iris_dtc.py
│ ├── plot_tree_regression.py
│ ├── plot_tree_regression_multioutput.py
│ └── plot_unveil_tree_structure.py
├── lgtm.yml
├── maint_tools
├── check_pxd_in_installation.py
├── sort_whats_new.py
├── update_tracking_issue.py
└── whats_missing.sh
├── pyproject.toml
├── setup.cfg
├── setup.py
└── sklearn
├── __check_build
├── __init__.py
├── _check_build.pyx
└── setup.py
├── __init__.py
├── _build_utils
├── __init__.py
├── openmp_helpers.py
└── pre_build_helpers.py
├── _config.py
├── _distributor_init.py
├── _isotonic.pyx
├── _loss
├── __init__.py
├── _loss.pxd
├── _loss.pyx.tp
├── glm_distribution.py
├── link.py
├── loss.py
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_glm_distribution.py
│ ├── test_link.py
│ └── test_loss.py
├── _min_dependencies.py
├── base.py
├── calibration.py
├── cluster
├── __init__.py
├── _affinity_propagation.py
├── _agglomerative.py
├── _bicluster.py
├── _birch.py
├── _dbscan.py
├── _dbscan_inner.pyx
├── _feature_agglomeration.py
├── _hierarchical_fast.pyx
├── _k_means_common.pxd
├── _k_means_common.pyx
├── _k_means_elkan.pyx
├── _k_means_lloyd.pyx
├── _k_means_minibatch.pyx
├── _kmeans.py
├── _mean_shift.py
├── _optics.py
├── _spectral.py
├── setup.py
└── tests
│ ├── __init__.py
│ ├── common.py
│ ├── test_affinity_propagation.py
│ ├── test_bicluster.py
│ ├── test_birch.py
│ ├── test_dbscan.py
│ ├── test_feature_agglomeration.py
│ ├── test_hierarchical.py
│ ├── test_k_means.py
│ ├── test_mean_shift.py
│ ├── test_optics.py
│ └── test_spectral.py
├── compose
├── __init__.py
├── _column_transformer.py
├── _target.py
└── tests
│ ├── __init__.py
│ ├── test_column_transformer.py
│ └── test_target.py
├── conftest.py
├── covariance
├── __init__.py
├── _elliptic_envelope.py
├── _empirical_covariance.py
├── _graph_lasso.py
├── _robust_covariance.py
├── _shrunk_covariance.py
└── tests
│ ├── __init__.py
│ ├── test_covariance.py
│ ├── test_elliptic_envelope.py
│ ├── test_graphical_lasso.py
│ └── test_robust_covariance.py
├── cross_decomposition
├── __init__.py
├── _pls.py
└── tests
│ ├── __init__.py
│ └── test_pls.py
├── datasets
├── __init__.py
├── _arff_parser.py
├── _base.py
├── _california_housing.py
├── _covtype.py
├── _kddcup99.py
├── _lfw.py
├── _olivetti_faces.py
├── _openml.py
├── _rcv1.py
├── _samples_generator.py
├── _species_distributions.py
├── _svmlight_format_fast.pyx
├── _svmlight_format_io.py
├── _twenty_newsgroups.py
├── data
│ ├── __init__.py
│ ├── boston_house_prices.csv
│ ├── breast_cancer.csv
│ ├── diabetes_data_raw.csv.gz
│ ├── diabetes_target.csv.gz
│ ├── digits.csv.gz
│ ├── iris.csv
│ ├── linnerud_exercise.csv
│ ├── linnerud_physiological.csv
│ └── wine_data.csv
├── descr
│ ├── __init__.py
│ ├── boston_house_prices.rst
│ ├── breast_cancer.rst
│ ├── california_housing.rst
│ ├── covtype.rst
│ ├── diabetes.rst
│ ├── digits.rst
│ ├── iris.rst
│ ├── kddcup99.rst
│ ├── lfw.rst
│ ├── linnerud.rst
│ ├── olivetti_faces.rst
│ ├── rcv1.rst
│ ├── twenty_newsgroups.rst
│ └── wine_data.rst
├── images
│ ├── README.txt
│ ├── __init__.py
│ ├── china.jpg
│ └── flower.jpg
├── setup.py
└── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── data
│ ├── __init__.py
│ ├── openml
│ │ ├── __init__.py
│ │ ├── id_1
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-1.json.gz
│ │ │ ├── api-v1-jdf-1.json.gz
│ │ │ ├── api-v1-jdq-1.json.gz
│ │ │ └── data-v1-dl-1.arff.gz
│ │ ├── id_1119
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-1119.json.gz
│ │ │ ├── api-v1-jdf-1119.json.gz
│ │ │ ├── api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz
│ │ │ ├── api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz
│ │ │ ├── api-v1-jdq-1119.json.gz
│ │ │ └── data-v1-dl-54002.arff.gz
│ │ ├── id_2
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-2.json.gz
│ │ │ ├── api-v1-jdf-2.json.gz
│ │ │ ├── api-v1-jdl-dn-anneal-l-2-dv-1.json.gz
│ │ │ ├── api-v1-jdl-dn-anneal-l-2-s-act-.json.gz
│ │ │ ├── api-v1-jdq-2.json.gz
│ │ │ └── data-v1-dl-1666876.arff.gz
│ │ ├── id_292
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-292.json.gz
│ │ │ ├── api-v1-jd-40981.json.gz
│ │ │ ├── api-v1-jdf-292.json.gz
│ │ │ ├── api-v1-jdf-40981.json.gz
│ │ │ ├── api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz
│ │ │ ├── api-v1-jdl-dn-australian-l-2-dv-1.json.gz
│ │ │ ├── api-v1-jdl-dn-australian-l-2-s-act-.json.gz
│ │ │ └── data-v1-dl-49822.arff.gz
│ │ ├── id_3
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-3.json.gz
│ │ │ ├── api-v1-jdf-3.json.gz
│ │ │ ├── api-v1-jdq-3.json.gz
│ │ │ └── data-v1-dl-3.arff.gz
│ │ ├── id_40589
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-40589.json.gz
│ │ │ ├── api-v1-jdf-40589.json.gz
│ │ │ ├── api-v1-jdl-dn-emotions-l-2-dv-3.json.gz
│ │ │ ├── api-v1-jdl-dn-emotions-l-2-s-act-.json.gz
│ │ │ ├── api-v1-jdq-40589.json.gz
│ │ │ └── data-v1-dl-4644182.arff.gz
│ │ ├── id_40675
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-40675.json.gz
│ │ │ ├── api-v1-jdf-40675.json.gz
│ │ │ ├── api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz
│ │ │ ├── api-v1-jdl-dn-glass2-l-2-dv-1.json.gz
│ │ │ ├── api-v1-jdl-dn-glass2-l-2-s-act-.json.gz
│ │ │ ├── api-v1-jdq-40675.json.gz
│ │ │ └── data-v1-dl-4965250.arff.gz
│ │ ├── id_40945
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-40945.json.gz
│ │ │ ├── api-v1-jdf-40945.json.gz
│ │ │ ├── api-v1-jdq-40945.json.gz
│ │ │ └── data-v1-dl-16826755.arff.gz
│ │ ├── id_40966
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-40966.json.gz
│ │ │ ├── api-v1-jdf-40966.json.gz
│ │ │ ├── api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz
│ │ │ ├── api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz
│ │ │ ├── api-v1-jdq-40966.json.gz
│ │ │ └── data-v1-dl-17928620.arff.gz
│ │ ├── id_42585
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-42585.json.gz
│ │ │ ├── api-v1-jdf-42585.json.gz
│ │ │ ├── api-v1-jdq-42585.json.gz
│ │ │ └── data-v1-dl-21854866.arff.gz
│ │ ├── id_561
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-561.json.gz
│ │ │ ├── api-v1-jdf-561.json.gz
│ │ │ ├── api-v1-jdl-dn-cpu-l-2-dv-1.json.gz
│ │ │ ├── api-v1-jdl-dn-cpu-l-2-s-act-.json.gz
│ │ │ ├── api-v1-jdq-561.json.gz
│ │ │ └── data-v1-dl-52739.arff.gz
│ │ ├── id_61
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-61.json.gz
│ │ │ ├── api-v1-jdf-61.json.gz
│ │ │ ├── api-v1-jdl-dn-iris-l-2-dv-1.json.gz
│ │ │ ├── api-v1-jdl-dn-iris-l-2-s-act-.json.gz
│ │ │ ├── api-v1-jdq-61.json.gz
│ │ │ └── data-v1-dl-61.arff.gz
│ │ └── id_62
│ │ │ ├── __init__.py
│ │ │ ├── api-v1-jd-62.json.gz
│ │ │ ├── api-v1-jdf-62.json.gz
│ │ │ ├── api-v1-jdq-62.json.gz
│ │ │ └── data-v1-dl-52352.arff.gz
│ ├── svmlight_classification.txt
│ ├── svmlight_invalid.txt
│ ├── svmlight_invalid_order.txt
│ └── svmlight_multilabel.txt
│ ├── test_20news.py
│ ├── test_base.py
│ ├── test_california_housing.py
│ ├── test_common.py
│ ├── test_covtype.py
│ ├── test_kddcup99.py
│ ├── test_lfw.py
│ ├── test_olivetti_faces.py
│ ├── test_openml.py
│ ├── test_rcv1.py
│ ├── test_samples_generator.py
│ └── test_svmlight_format.py
├── decomposition
├── __init__.py
├── _base.py
├── _cdnmf_fast.pyx
├── _dict_learning.py
├── _factor_analysis.py
├── _fastica.py
├── _incremental_pca.py
├── _kernel_pca.py
├── _lda.py
├── _nmf.py
├── _online_lda_fast.pyx
├── _pca.py
├── _sparse_pca.py
├── _truncated_svd.py
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_dict_learning.py
│ ├── test_factor_analysis.py
│ ├── test_fastica.py
│ ├── test_incremental_pca.py
│ ├── test_kernel_pca.py
│ ├── test_nmf.py
│ ├── test_online_lda.py
│ ├── test_pca.py
│ ├── test_sparse_pca.py
│ └── test_truncated_svd.py
├── discriminant_analysis.py
├── dummy.py
├── ensemble
├── __init__.py
├── _bagging.py
├── _base.py
├── _forest.py
├── _gb.py
├── _gb_losses.py
├── _gradient_boosting.pyx
├── _hist_gradient_boosting
│ ├── __init__.py
│ ├── _binning.pyx
│ ├── _bitset.pxd
│ ├── _bitset.pyx
│ ├── _gradient_boosting.pyx
│ ├── _predictor.pyx
│ ├── binning.py
│ ├── common.pxd
│ ├── common.pyx
│ ├── gradient_boosting.py
│ ├── grower.py
│ ├── histogram.pyx
│ ├── predictor.py
│ ├── splitting.pyx
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_binning.py
│ │ ├── test_bitset.py
│ │ ├── test_compare_lightgbm.py
│ │ ├── test_gradient_boosting.py
│ │ ├── test_grower.py
│ │ ├── test_histogram.py
│ │ ├── test_monotonic_contraints.py
│ │ ├── test_predictor.py
│ │ ├── test_splitting.py
│ │ └── test_warm_start.py
│ └── utils.pyx
├── _iforest.py
├── _stacking.py
├── _voting.py
├── _weight_boosting.py
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_bagging.py
│ ├── test_base.py
│ ├── test_common.py
│ ├── test_forest.py
│ ├── test_gradient_boosting.py
│ ├── test_gradient_boosting_loss_functions.py
│ ├── test_iforest.py
│ ├── test_stacking.py
│ ├── test_voting.py
│ └── test_weight_boosting.py
├── exceptions.py
├── experimental
├── __init__.py
├── enable_halving_search_cv.py
├── enable_hist_gradient_boosting.py
├── enable_iterative_imputer.py
└── tests
│ ├── __init__.py
│ ├── test_enable_hist_gradient_boosting.py
│ ├── test_enable_iterative_imputer.py
│ └── test_enable_successive_halving.py
├── externals
├── README
├── __init__.py
├── _arff.py
├── _lobpcg.py
├── _packaging
│ ├── __init__.py
│ ├── _structures.py
│ └── version.py
├── _pilutil.py
└── conftest.py
├── feature_extraction
├── __init__.py
├── _dict_vectorizer.py
├── _hash.py
├── _hashing_fast.pyx
├── _stop_words.py
├── image.py
├── setup.py
├── tests
│ ├── __init__.py
│ ├── test_dict_vectorizer.py
│ ├── test_feature_hasher.py
│ ├── test_image.py
│ └── test_text.py
└── text.py
├── feature_selection
├── __init__.py
├── _base.py
├── _from_model.py
├── _mutual_info.py
├── _rfe.py
├── _sequential.py
├── _univariate_selection.py
├── _variance_threshold.py
└── tests
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_chi2.py
│ ├── test_feature_select.py
│ ├── test_from_model.py
│ ├── test_mutual_info.py
│ ├── test_rfe.py
│ ├── test_sequential.py
│ └── test_variance_threshold.py
├── gaussian_process
├── __init__.py
├── _gpc.py
├── _gpr.py
├── kernels.py
└── tests
│ ├── __init__.py
│ ├── _mini_sequence_kernel.py
│ ├── test_gpc.py
│ ├── test_gpr.py
│ └── test_kernels.py
├── impute
├── __init__.py
├── _base.py
├── _iterative.py
├── _knn.py
└── tests
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_common.py
│ ├── test_impute.py
│ └── test_knn.py
├── inspection
├── __init__.py
├── _partial_dependence.py
├── _permutation_importance.py
├── _plot
│ ├── __init__.py
│ ├── partial_dependence.py
│ └── tests
│ │ ├── __init__.py
│ │ └── test_plot_partial_dependence.py
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_partial_dependence.py
│ └── test_permutation_importance.py
├── isotonic.py
├── kernel_approximation.py
├── kernel_ridge.py
├── linear_model
├── __init__.py
├── _base.py
├── _bayes.py
├── _cd_fast.pyx
├── _coordinate_descent.py
├── _glm
│ ├── __init__.py
│ ├── glm.py
│ ├── link.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_glm.py
│ │ └── test_link.py
├── _huber.py
├── _least_angle.py
├── _linear_loss.py
├── _logistic.py
├── _omp.py
├── _passive_aggressive.py
├── _perceptron.py
├── _quantile.py
├── _ransac.py
├── _ridge.py
├── _sag.py
├── _sag_fast.pyx.tp
├── _sgd_fast.pxd
├── _sgd_fast.pyx
├── _sgd_fast_helpers.h
├── _stochastic_gradient.py
├── _theil_sen.py
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_bayes.py
│ ├── test_common.py
│ ├── test_coordinate_descent.py
│ ├── test_huber.py
│ ├── test_least_angle.py
│ ├── test_linear_loss.py
│ ├── test_logistic.py
│ ├── test_omp.py
│ ├── test_passive_aggressive.py
│ ├── test_perceptron.py
│ ├── test_quantile.py
│ ├── test_ransac.py
│ ├── test_ridge.py
│ ├── test_sag.py
│ ├── test_sgd.py
│ ├── test_sparse_coordinate_descent.py
│ └── test_theil_sen.py
├── manifold
├── __init__.py
├── _barnes_hut_tsne.pyx
├── _isomap.py
├── _locally_linear.py
├── _mds.py
├── _spectral_embedding.py
├── _t_sne.py
├── _utils.pyx
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_isomap.py
│ ├── test_locally_linear.py
│ ├── test_mds.py
│ ├── test_spectral_embedding.py
│ └── test_t_sne.py
├── metrics
├── __init__.py
├── _base.py
├── _classification.py
├── _dist_metrics.pxd
├── _dist_metrics.pyx
├── _pairwise_distances_reduction.pyx
├── _pairwise_fast.pyx
├── _plot
│ ├── __init__.py
│ ├── base.py
│ ├── confusion_matrix.py
│ ├── det_curve.py
│ ├── precision_recall_curve.py
│ ├── roc_curve.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_base.py
│ │ ├── test_common_curve_display.py
│ │ ├── test_confusion_matrix_display.py
│ │ ├── test_det_curve_display.py
│ │ ├── test_plot_confusion_matrix.py
│ │ ├── test_plot_curve_common.py
│ │ ├── test_plot_det_curve.py
│ │ ├── test_plot_precision_recall.py
│ │ ├── test_plot_roc_curve.py
│ │ ├── test_precision_recall_display.py
│ │ └── test_roc_curve_display.py
├── _ranking.py
├── _regression.py
├── _scorer.py
├── cluster
│ ├── __init__.py
│ ├── _bicluster.py
│ ├── _expected_mutual_info_fast.pyx
│ ├── _supervised.py
│ ├── _unsupervised.py
│ ├── setup.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_bicluster.py
│ │ ├── test_common.py
│ │ ├── test_supervised.py
│ │ └── test_unsupervised.py
├── pairwise.py
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_classification.py
│ ├── test_common.py
│ ├── test_dist_metrics.py
│ ├── test_pairwise.py
│ ├── test_pairwise_distances_reduction.py
│ ├── test_ranking.py
│ ├── test_regression.py
│ └── test_score_objects.py
├── mixture
├── __init__.py
├── _base.py
├── _bayesian_mixture.py
├── _gaussian_mixture.py
└── tests
│ ├── __init__.py
│ ├── test_bayesian_mixture.py
│ ├── test_gaussian_mixture.py
│ └── test_mixture.py
├── model_selection
├── __init__.py
├── _search.py
├── _search_successive_halving.py
├── _split.py
├── _validation.py
└── tests
│ ├── __init__.py
│ ├── common.py
│ ├── test_search.py
│ ├── test_split.py
│ ├── test_successive_halving.py
│ └── test_validation.py
├── multiclass.py
├── multioutput.py
├── naive_bayes.py
├── neighbors
├── __init__.py
├── _ball_tree.pyx
├── _base.py
├── _binary_tree.pxi
├── _classification.py
├── _distance_metric.py
├── _graph.py
├── _kd_tree.pyx
├── _kde.py
├── _lof.py
├── _nca.py
├── _nearest_centroid.py
├── _partition_nodes.pxd
├── _partition_nodes.pyx
├── _quad_tree.pxd
├── _quad_tree.pyx
├── _regression.py
├── _unsupervised.py
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_ball_tree.py
│ ├── test_graph.py
│ ├── test_kd_tree.py
│ ├── test_kde.py
│ ├── test_lof.py
│ ├── test_nca.py
│ ├── test_nearest_centroid.py
│ ├── test_neighbors.py
│ ├── test_neighbors_pipeline.py
│ ├── test_neighbors_tree.py
│ └── test_quad_tree.py
├── neural_network
├── __init__.py
├── _base.py
├── _multilayer_perceptron.py
├── _rbm.py
├── _stochastic_optimizers.py
└── tests
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_mlp.py
│ ├── test_rbm.py
│ └── test_stochastic_optimizers.py
├── pipeline.py
├── preprocessing
├── __init__.py
├── _csr_polynomial_expansion.pyx
├── _data.py
├── _discretization.py
├── _encoders.py
├── _function_transformer.py
├── _label.py
├── _polynomial.py
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_common.py
│ ├── test_data.py
│ ├── test_discretization.py
│ ├── test_encoders.py
│ ├── test_function_transformer.py
│ ├── test_label.py
│ └── test_polynomial.py
├── random_projection.py
├── semi_supervised
├── __init__.py
├── _label_propagation.py
├── _self_training.py
└── tests
│ ├── __init__.py
│ ├── test_label_propagation.py
│ └── test_self_training.py
├── setup.py
├── svm
├── __init__.py
├── _base.py
├── _bounds.py
├── _classes.py
├── _liblinear.pxi
├── _liblinear.pyx
├── _libsvm.pxi
├── _libsvm.pyx
├── _libsvm_sparse.pyx
├── _newrand.pyx
├── setup.py
├── src
│ ├── liblinear
│ │ ├── COPYRIGHT
│ │ ├── _cython_blas_helpers.h
│ │ ├── liblinear_helper.c
│ │ ├── linear.cpp
│ │ ├── linear.h
│ │ ├── tron.cpp
│ │ └── tron.h
│ ├── libsvm
│ │ ├── LIBSVM_CHANGES
│ │ ├── _svm_cython_blas_helpers.h
│ │ ├── libsvm_helper.c
│ │ ├── libsvm_sparse_helper.c
│ │ ├── libsvm_template.cpp
│ │ ├── svm.cpp
│ │ └── svm.h
│ └── newrand
│ │ └── newrand.h
└── tests
│ ├── __init__.py
│ ├── test_bounds.py
│ ├── test_sparse.py
│ └── test_svm.py
├── tests
├── __init__.py
├── test_base.py
├── test_build.py
├── test_calibration.py
├── test_check_build.py
├── test_common.py
├── test_config.py
├── test_discriminant_analysis.py
├── test_docstring_parameters.py
├── test_docstrings.py
├── test_dummy.py
├── test_init.py
├── test_isotonic.py
├── test_kernel_approximation.py
├── test_kernel_ridge.py
├── test_metaestimators.py
├── test_min_dependencies_readme.py
├── test_multiclass.py
├── test_multioutput.py
├── test_naive_bayes.py
├── test_pipeline.py
└── test_random_projection.py
├── tree
├── __init__.py
├── _classes.py
├── _criterion.pxd
├── _criterion.pyx
├── _export.py
├── _reingold_tilford.py
├── _splitter.pxd
├── _splitter.pyx
├── _tree.pxd
├── _tree.pyx
├── _utils.pxd
├── _utils.pyx
├── setup.py
└── tests
│ ├── __init__.py
│ ├── test_export.py
│ ├── test_reingold_tilford.py
│ └── test_tree.py
└── utils
├── __init__.py
├── _arpack.py
├── _bunch.py
├── _cython_blas.pxd
├── _cython_blas.pyx
├── _encode.py
├── _estimator_html_repr.py
├── _fast_dict.pxd
├── _fast_dict.pyx
├── _heap.pxd
├── _heap.pyx
├── _joblib.py
├── _logistic_sigmoid.pyx
├── _mask.py
├── _mocking.py
├── _openmp_helpers.pxd
├── _openmp_helpers.pyx
├── _pprint.py
├── _random.pxd
├── _random.pyx
├── _readonly_array_wrapper.pyx
├── _seq_dataset.pxd.tp
├── _seq_dataset.pyx.tp
├── _show_versions.py
├── _tags.py
├── _testing.py
├── _typedefs.pxd
├── _typedefs.pyx
├── _weight_vector.pxd.tp
├── _weight_vector.pyx.tp
├── arrayfuncs.pyx
├── class_weight.py
├── deprecation.py
├── estimator_checks.py
├── extmath.py
├── fixes.py
├── graph.py
├── metaestimators.py
├── multiclass.py
├── murmurhash.pxd
├── murmurhash.pyx
├── optimize.py
├── random.py
├── setup.py
├── sparsefuncs.py
├── sparsefuncs_fast.pyx
├── src
├── MurmurHash3.cpp
└── MurmurHash3.h
├── stats.py
├── tests
├── __init__.py
├── conftest.py
├── test_arpack.py
├── test_arrayfuncs.py
├── test_class_weight.py
├── test_cython_blas.py
├── test_cython_templating.py
├── test_deprecation.py
├── test_encode.py
├── test_estimator_checks.py
├── test_estimator_html_repr.py
├── test_extmath.py
├── test_fast_dict.py
├── test_fixes.py
├── test_graph.py
├── test_metaestimators.py
├── test_mocking.py
├── test_multiclass.py
├── test_murmurhash.py
├── test_optimize.py
├── test_parallel.py
├── test_pprint.py
├── test_random.py
├── test_readonly_wrapper.py
├── test_seq_dataset.py
├── test_shortest_path.py
├── test_show_versions.py
├── test_sparsefuncs.py
├── test_stats.py
├── test_tags.py
├── test_testing.py
├── test_utils.py
├── test_validation.py
└── test_weight_vector.py
└── validation.py
/.binder/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
2 | --pre
3 | matplotlib
4 | scikit-image
5 | pandas
6 | sphinx-gallery
7 | scikit-learn
8 |
9 |
--------------------------------------------------------------------------------
/.circleci/artifact_path:
--------------------------------------------------------------------------------
1 | 0/doc/_changed.html
2 |
--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | # Commits pushed to main should not make the overall
8 | # project coverage decrease by more than 1%:
9 | target: auto
10 | threshold: 1%
11 | patch:
12 | default:
13 | # Be tolerant on slight code coverage diff on PRs to limit
14 | # noisy red coverage status on github PRs.
15 | # Note: The coverage stats are still uploaded
16 | # to codecov so that PR reviewers can see uncovered lines
17 | target: auto
18 | threshold: 1%
19 |
20 | codecov:
21 | notify:
22 | # Prevent coverage status to upload multiple times for parallel and long
23 | # running CI pipelines. This configuration is particularly useful on PRs
24 | # to avoid confusion. Note that this value is set to the number of Azure
25 | # Pipeline jobs uploading coverage reports.
26 | after_n_builds: 6
27 |
28 | ignore:
29 | - "sklearn/externals"
30 | - "sklearn/_build_utils"
31 | - "**/setup.py"
32 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source = sklearn
4 | parallel = True
5 | omit =
6 | */sklearn/externals/*
7 | */sklearn/_build_utils/*
8 | */benchmarks/*
9 | **/setup.py
10 |
--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # Since git version 2.23, git-blame has a feature to ignore
2 | # certain commits.
3 | #
4 | # This file contains a list of commits that are not likely what
5 | # you are looking for in `git blame`. You can set this file as
6 | # a default ignore file for blame by running the following
7 | # command.
8 | #
9 | # $ git config blame.ignoreRevsFile .git-blame-ignore-revs
10 |
11 | # PR 18948: Migrate code style to Black
12 | 82df48934eba1df9a1ed3be98aaace8eada59e6e
13 |
14 | # PR 20294: Use target_version >= 3.7 in Black
15 | 351ace7935a4ea685171cc6d174890f08facd561
16 |
17 | # PR 20412: Use experimental_string_processing=true in Black
18 | 3ae7c7615343bbd36acece57825d8b0d70fd9da4
19 |
20 | # PR 20502: Runs Black on examples
21 | 70a185ae59b4362633d18b0d0083abb1b6f7370c
22 |
23 | # PR 22474: Update to Black 22.1.0
24 | 1fc86b6aacd89da44a3b4e8abf7c3e2ba4336ffe
25 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | /doc/whats_new/v*.rst merge=union
2 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: ['https://numfocus.org/donate-to-scikit-learn']
13 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 | - name: Discussions
4 | url: https://github.com/scikit-learn/scikit-learn/discussions/new
5 | about: Ask questions and discuss with other scikit-learn community members
6 | - name: Stack Overflow
7 | url: https://stackoverflow.com/questions/tagged/scikit-learn
8 | about: Please ask and answer usage questions on Stack Overflow
9 | - name: Mailing list
10 | url: https://mail.python.org/mailman/listinfo/scikit-learn
11 | about: General discussions and announcements on the mailing list
12 | - name: Gitter
13 | url: https://gitter.im/scikit-learn/scikit-learn
14 | about: Users and developers can sometimes be found on the gitter channel
15 | - name: Blank issue
16 | url: https://github.com/scikit-learn/scikit-learn/issues/new
17 | about: Please note that Github Discussions should be used in most cases instead
18 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/doc_improvement.yml:
--------------------------------------------------------------------------------
1 | name: Documentation improvement
2 | description: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change.
3 | labels: [Documentation, 'Needs Triage']
4 |
5 | body:
6 | - type: textarea
7 | attributes:
8 | label: Describe the issue linked to the documentation
9 | description: >
10 | Tell us about the confusion introduced in the documentation.
11 | validations:
12 | required: true
13 | - type: textarea
14 | attributes:
15 | label: Suggest a potential alternative/fix
16 | description: >
17 | Tell us how we could improve the documentation in this regard.
18 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
1 | name: Feature request
2 | description: Suggest a new algorithm, enhancement to an existing algorithm, etc.
3 | labels: ['New Feature', 'Needs Triage']
4 |
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: >
9 | #### If you want to propose a new algorithm, please refer first to the [scikit-learn inclusion criterion](https://scikit-learn.org/stable/faq.html#what-are-the-inclusion-criteria-for-new-algorithms).
10 | - type: textarea
11 | attributes:
12 | label: Describe the workflow you want to enable
13 | validations:
14 | required: true
15 | - type: textarea
16 | attributes:
17 | label: Describe your proposed solution
18 | validations:
19 | required: true
20 | - type: textarea
21 | attributes:
22 | label: Describe alternatives you've considered, if relevant
23 | - type: textarea
24 | attributes:
25 | label: Additional context
26 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
5 |
6 | #### Reference Issues/PRs
7 |
13 |
14 |
15 | #### What does this implement/fix? Explain your changes.
16 |
17 |
18 | #### Any other comments?
19 |
20 |
21 |
33 |
--------------------------------------------------------------------------------
/.github/labeler-file-extensions.yml:
--------------------------------------------------------------------------------
1 | cython:
2 | - sklearn/**/*.pyx
3 | - sklearn/**/*.pxd
4 | - sklearn/**/*.pxi
5 | # Tempita templates
6 | - sklearn/**/*.pyx.tp
7 | - sklearn/**/*.pxd.tp
8 | - sklearn/**/*.pxi.tp
9 |
--------------------------------------------------------------------------------
/.github/scripts/label_title_regex.py:
--------------------------------------------------------------------------------
1 | """Labels PRs based on title. Must be run in a github action with the
2 | pull_request_target event."""
3 | from github import Github
4 | import os
5 | import json
6 | import re
7 |
8 | context_dict = json.loads(os.getenv("CONTEXT_GITHUB"))
9 |
10 | repo = context_dict["repository"]
11 | g = Github(context_dict["token"])
12 | repo = g.get_repo(repo)
13 | pr_number = context_dict["event"]["number"]
14 | issue = repo.get_issue(number=pr_number)
15 | title = issue.title
16 |
17 |
18 | regex_to_labels = [(r"\bDOC\b", "Documentation"), (r"\bCI\b", "Build / CI")]
19 |
20 | labels_to_add = [label for regex, label in regex_to_labels if re.search(regex, title)]
21 |
22 | if labels_to_add:
23 | issue.add_to_labels(*labels_to_add)
24 |
--------------------------------------------------------------------------------
/.github/workflows/assign.yml:
--------------------------------------------------------------------------------
1 |
2 | name: Assign
3 | on:
4 | issue_comment:
5 | types: created
6 |
7 | jobs:
8 | one:
9 | runs-on: ubuntu-latest
10 | # Note that string comparisons is not case sensitive.
11 | if: >-
12 | startsWith(github.event.comment.body, '/take')
13 | && !github.event.issue.assignee
14 | steps:
15 | - run: |
16 | echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
17 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
18 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X "DELETE" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/help%20wanted
19 |
--------------------------------------------------------------------------------
/.github/workflows/check-manifest.yml:
--------------------------------------------------------------------------------
1 | name: "Check Manifest"
2 |
3 | on:
4 | schedule:
5 | - cron: '0 0 * * *'
6 |
7 | jobs:
8 | check-manifest:
9 | # Don't run on forks
10 | if: github.repository == 'scikit-learn/scikit-learn'
11 |
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v2
15 | - uses: actions/setup-python@v2
16 | with:
17 | python-version: '3.9'
18 | - name: Install dependencies
19 | # scipy and cython are required to build sdist
20 | run: |
21 | python -m pip install --upgrade pip
22 | pip install check-manifest scipy cython
23 | - run: |
24 | check-manifest -v
25 |
26 | update-tracker:
27 | uses: ./.github/workflows/update_tracking_issue.yml
28 | if: ${{ always() }}
29 | needs: [check-manifest]
30 | with:
31 | job_status: ${{ needs.check-manifest.result }}
32 | secrets:
33 | BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }}
34 |
--------------------------------------------------------------------------------
/.github/workflows/label-blank-issue.yml:
--------------------------------------------------------------------------------
1 | name: Labels Blank issues
2 |
3 | on:
4 | issues:
5 | types: [opened]
6 |
7 | jobs:
8 | label-blank-issues:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: andymckay/labeler@1.0.4
12 | with:
13 | add-labels: "Needs Triage"
14 | ignore-if-labeled: true
15 |
--------------------------------------------------------------------------------
/.github/workflows/labeler-module.yml:
--------------------------------------------------------------------------------
1 | name: "Pull Request Labeler"
2 | on:
3 | pull_request_target:
4 | types: [opened]
5 |
6 | jobs:
7 | triage:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - uses: thomasjpfan/labeler@v2.5.0
11 | continue-on-error: true
12 | if: github.repository == 'scikit-learn/scikit-learn'
13 | with:
14 | repo-token: "${{ secrets.GITHUB_TOKEN }}"
15 | max-labels: "3"
16 | configuration-path: ".github/labeler-module.yml"
17 |
18 | triage_file_extensions:
19 | runs-on: ubuntu-latest
20 | steps:
21 | - uses: thomasjpfan/labeler@v2.5.0
22 | continue-on-error: true
23 | if: github.repository == 'scikit-learn/scikit-learn'
24 | with:
25 | repo-token: "${{ secrets.GITHUB_TOKEN }}"
26 | configuration-path: ".github/labeler-file-extensions.yml"
27 |
--------------------------------------------------------------------------------
/.github/workflows/labeler-title-regex.yml:
--------------------------------------------------------------------------------
1 | name: Pull Request Regex Title Labeler
2 | on:
3 | pull_request_target:
4 | types: [opened, edited]
5 |
6 | permissions:
7 | contents: read
8 | pull-requests: write
9 |
10 | jobs:
11 |
12 | labeler:
13 | runs-on: ubuntu-20.04
14 | steps:
15 | - uses: actions/checkout@v2
16 | - uses: actions/setup-python@v2
17 | with:
18 | python-version: '3.9'
19 | - name: Install PyGithub
20 | run: pip install -Uq PyGithub
21 | - name: Label pull request
22 | run: python .github/scripts/label_title_regex.py
23 | env:
24 | CONTEXT_GITHUB: ${{ toJson(github) }}
25 |
--------------------------------------------------------------------------------
/.github/workflows/twitter.yml:
--------------------------------------------------------------------------------
1 | # Tweet the URL of a commit on @sklearn_commits whenever a push event
2 | # happens on the main branch
3 | name: Twitter Push Notification
4 |
5 |
6 | on:
7 | push:
8 | branches:
9 | - main
10 |
11 |
12 | jobs:
13 | tweet:
14 | name: Twitter Notification
15 | runs-on: ubuntu-latest
16 | steps:
17 | - name: Tweet URL of last commit as @sklearn_commits
18 | if: github.repository == 'scikit-learn/scikit-learn'
19 | uses: docker://thomasjpfan/twitter-action:0.3
20 | with:
21 | args: "-message \"https://github.com/scikit-learn/scikit-learn/commit/${{ github.sha }}\""
22 | env:
23 | TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }}
24 | TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }}
25 | TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
26 | TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }}
27 |
--------------------------------------------------------------------------------
/.github/workflows/unassign.yml:
--------------------------------------------------------------------------------
1 | name: Unassign
2 | #Runs when a contributor has unassigned themselves from the issue and adds 'help wanted'
3 | on:
4 | issues:
5 | types: unassigned
6 |
7 | jobs:
8 | one:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - name:
12 | if: github.event.issue.state == 'open'
13 | run: |
14 | echo "Marking issue ${{ github.event.issue.number }} as help wanted"
15 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"labels": ["help wanted"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels
16 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v2.3.0
4 | hooks:
5 | - id: check-yaml
6 | - id: end-of-file-fixer
7 | - id: trailing-whitespace
8 | - repo: https://github.com/psf/black
9 | rev: 22.1.0
10 | hooks:
11 | - id: black
12 | - repo: https://gitlab.com/pycqa/flake8
13 | rev: 3.9.2
14 | hooks:
15 | - id: flake8
16 | types: [file, python]
17 | - repo: https://github.com/pre-commit/mirrors-mypy
18 | rev: v0.782
19 | hooks:
20 | - id: mypy
21 | files: sklearn/
22 | additional_dependencies: [pytest==6.2.4]
23 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | We are a community based on openness, as well as friendly and didactic discussions.
4 |
5 | We aspire to treat everybody equally, and value their contributions.
6 |
7 | Decisions are made based on technical merit and consensus.
8 |
9 | Code is not the only way to help the project. Reviewing pull requests,
10 | answering questions to help others on mailing lists or issues, organizing and
11 | teaching tutorials, working on the website, improving the documentation, are
12 | all priceless contributions.
13 |
14 | We abide by the principles of openness, respect, and consideration of others of
15 | the Python Software Foundation: https://www.python.org/psf/codeofconduct/
16 |
17 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | recursive-include doc *
3 | recursive-include examples *
4 | recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi *.tp
5 | recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz
6 | include COPYING
7 | include README.rst
8 | include pyproject.toml
9 | include sklearn/externals/README
10 | include sklearn/svm/src/liblinear/COPYRIGHT
11 | include sklearn/svm/src/libsvm/LIBSVM_CHANGES
12 | include conftest.py
13 | include Makefile
14 | include MANIFEST.in
15 | include .coveragerc
16 |
17 | # exclude from sdist
18 | recursive-exclude asv_benchmarks *
19 | recursive-exclude benchmarks *
20 | recursive-exclude build_tools *
21 | recursive-exclude maint_tools *
22 | recursive-exclude benchmarks *
23 | recursive-exclude .binder *
24 | recursive-exclude .circleci *
25 | exclude .codecov.yml
26 | exclude .git-blame-ignore-revs
27 | exclude .mailmap
28 | exclude .pre-commit-config.yaml
29 | exclude azure-pipelines.yml
30 | exclude lgtm.yml
31 | exclude CODE_OF_CONDUCT.md
32 | exclude CONTRIBUTING.md
33 | exclude SECURITY.md
34 | exclude PULL_REQUEST_TEMPLATE.md
35 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | | Version | Supported |
6 | | --------- | ------------------ |
7 | | 1.0.1 | :white_check_mark: |
8 | | < 1.0.1 | :x: |
9 |
10 | ## Reporting a Vulnerability
11 |
12 | Please report security vulnerabilities by email to `security@scikit-learn.org`.
13 | This email is an alias to a subset of the scikit-learn maintainers' team.
14 |
15 | If the security vulnerability is accepted, a patch will be crafted privately
16 | in order to prepare a dedicated bugfix release as timely as possible (depending
17 | on the complexity of the fix).
18 |
--------------------------------------------------------------------------------
/asv_benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | *__pycache__*
2 | env/
3 | html/
4 | results/
5 | scikit-learn/
6 | benchmarks/cache/
7 |
--------------------------------------------------------------------------------
/asv_benchmarks/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | """Benchmark suite for scikit-learn using ASV"""
2 |
--------------------------------------------------------------------------------
/asv_benchmarks/benchmarks/manifold.py:
--------------------------------------------------------------------------------
1 | from sklearn.manifold import TSNE
2 |
3 | from .common import Benchmark, Estimator
4 | from .datasets import _digits_dataset
5 |
6 |
7 | class TSNEBenchmark(Estimator, Benchmark):
8 | """
9 | Benchmarks for t-SNE.
10 | """
11 |
12 | param_names = ["method"]
13 | params = (["exact", "barnes_hut"],)
14 |
15 | def setup_cache(self):
16 | super().setup_cache()
17 |
18 | def make_data(self, params):
19 | (method,) = params
20 |
21 | n_samples = 500 if method == "exact" else None
22 |
23 | return _digits_dataset(n_samples=n_samples)
24 |
25 | def make_estimator(self, params):
26 | (method,) = params
27 |
28 | estimator = TSNE(random_state=0, method=method)
29 |
30 | return estimator
31 |
32 | def make_scorers(self):
33 | self.train_scorer = lambda _, __: self.estimator.kl_divergence_
34 | self.test_scorer = lambda _, __: self.estimator.kl_divergence_
35 |
--------------------------------------------------------------------------------
/asv_benchmarks/benchmarks/svm.py:
--------------------------------------------------------------------------------
1 | from sklearn.svm import SVC
2 |
3 | from .common import Benchmark, Estimator, Predictor
4 | from .datasets import _synth_classification_dataset
5 | from .utils import make_gen_classif_scorers
6 |
7 |
8 | class SVCBenchmark(Predictor, Estimator, Benchmark):
9 | """Benchmarks for SVC."""
10 |
11 | param_names = ["kernel"]
12 | params = (["linear", "poly", "rbf", "sigmoid"],)
13 |
14 | def setup_cache(self):
15 | super().setup_cache()
16 |
17 | def make_data(self, params):
18 | return _synth_classification_dataset()
19 |
20 | def make_estimator(self, params):
21 | (kernel,) = params
22 |
23 | estimator = SVC(
24 | max_iter=100, tol=1e-16, kernel=kernel, random_state=0, gamma="scale"
25 | )
26 |
27 | return estimator
28 |
29 | def make_scorers(self):
30 | make_gen_classif_scorers(self)
31 |
--------------------------------------------------------------------------------
/benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | /bhtsne
2 | *.npy
3 | *.json
4 | /mnist_tsne_output/
5 |
--------------------------------------------------------------------------------
/benchmarks/plot_tsne_mnist.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import os.path as op
4 |
5 | import argparse
6 |
7 |
8 | LOG_DIR = "mnist_tsne_output"
9 |
10 |
11 | if __name__ == "__main__":
12 | parser = argparse.ArgumentParser("Plot benchmark results for t-SNE")
13 | parser.add_argument(
14 | "--labels",
15 | type=str,
16 | default=op.join(LOG_DIR, "mnist_original_labels_10000.npy"),
17 | help="1D integer numpy array for labels",
18 | )
19 | parser.add_argument(
20 | "--embedding",
21 | type=str,
22 | default=op.join(LOG_DIR, "mnist_sklearn_TSNE_10000.npy"),
23 | help="2D float numpy array for embedded data",
24 | )
25 | args = parser.parse_args()
26 |
27 | X = np.load(args.embedding)
28 | y = np.load(args.labels)
29 |
30 | for i in np.unique(y):
31 | mask = y == i
32 | plt.scatter(X[mask, 0], X[mask, 1], alpha=0.2, label=int(i))
33 | plt.legend(loc="best")
34 | plt.show()
35 |
--------------------------------------------------------------------------------
/build_tools/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for maintenance tools
2 |
3 | authors:
4 | python generate_authors_table.py
5 |
--------------------------------------------------------------------------------
/build_tools/azure/install_win.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | if [[ "$PYTHON_ARCH" == "64" ]]; then
7 | conda create -n $VIRTUALENV -q -y python=$PYTHON_VERSION numpy scipy cython matplotlib wheel pillow joblib
8 |
9 | source activate $VIRTUALENV
10 |
11 | pip install threadpoolctl
12 |
13 | if [[ "$PYTEST_VERSION" == "*" ]]; then
14 | pip install pytest
15 | else
16 | pip install pytest==$PYTEST_VERSION
17 | fi
18 | else
19 | pip install numpy scipy cython pytest wheel pillow joblib threadpoolctl
20 | fi
21 |
22 | if [[ "$PYTEST_XDIST_VERSION" != "none" ]]; then
23 | pip install pytest-xdist
24 | fi
25 |
26 | if [[ "$COVERAGE" == "true" ]]; then
27 | # XXX: coverage is temporary pinned to 6.2 because 6.3 is not fork-safe
28 | # cf. https://github.com/nedbat/coveragepy/issues/1310
29 | pip install coverage codecov pytest-cov coverage==6.2
30 | fi
31 |
32 | python --version
33 | pip --version
34 |
35 | # Build scikit-learn
36 | python setup.py bdist_wheel
37 |
38 | # Install the generated wheel package to test it
39 | pip install --pre --no-index --find-links dist scikit-learn
40 |
--------------------------------------------------------------------------------
/build_tools/azure/test_docs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | if [[ "$DISTRIB" =~ ^conda.* ]]; then
6 | source activate $VIRTUALENV
7 | elif [[ "$DISTRIB" == "ubuntu" ]]; then
8 | source $VIRTUALENV/bin/activate
9 | fi
10 |
11 | if [[ "$BUILD_WITH_ICC" == "true" ]]; then
12 | source /opt/intel/oneapi/setvars.sh
13 | fi
14 |
15 | make test-doc
16 |
--------------------------------------------------------------------------------
/build_tools/azure/test_pytest_soft_dependency.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | # called when DISTRIB=="conda"
6 | source activate $VIRTUALENV
7 | conda remove -y py pytest || pip uninstall -y py pytest
8 |
9 | if [[ "$COVERAGE" == "true" ]]; then
10 | # conda may remove coverage when uninstall pytest and py
11 | pip install coverage
12 | # Need to append the coverage to the existing .coverage generated by
13 | # running the tests. Make sure to reuse the same coverage
14 | # configuration as the one used by the main pytest run to be
15 | # able to combine the results.
16 | CMD="coverage run --rcfile=$BUILD_SOURCESDIRECTORY/.coveragerc"
17 | else
18 | CMD="python"
19 | fi
20 |
21 | # .coverage from running the tests is in TEST_DIR
22 | pushd $TEST_DIR
23 | $CMD -m sklearn.utils.tests.test_estimator_checks
24 | popd
25 |
--------------------------------------------------------------------------------
/build_tools/azure/upload_codecov.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | # called when COVERAGE=="true" and DISTRIB=="conda"
6 | export PATH=$HOME/miniconda3/bin:$PATH
7 | source activate $VIRTUALENV
8 |
9 | # Need to run codecov from a git checkout, so we copy .coverage
10 | # from TEST_DIR where pytest has been run
11 | pushd $TEST_DIR
12 | coverage combine --append
13 | popd
14 | cp $TEST_DIR/.coverage $BUILD_REPOSITORY_LOCALPATH
15 |
16 | codecov --root $BUILD_REPOSITORY_LOCALPATH -t $CODECOV_TOKEN || echo "codecov upload failed"
17 |
--------------------------------------------------------------------------------
/build_tools/circle/checkout_merge_commit.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | # Add `main` branch to the update list.
5 | # Otherwise CircleCI will give us a cached one.
6 | FETCH_REFS="+main:main"
7 |
8 | # Update PR refs for testing.
9 | if [[ -n "${CIRCLE_PR_NUMBER}" ]]
10 | then
11 | FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/head:pr/${CIRCLE_PR_NUMBER}/head"
12 | FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/merge:pr/${CIRCLE_PR_NUMBER}/merge"
13 | fi
14 |
15 | # Retrieve the refs.
16 | git fetch -u origin ${FETCH_REFS}
17 |
18 | # Checkout the PR merge ref.
19 | if [[ -n "${CIRCLE_PR_NUMBER}" ]]
20 | then
21 | git checkout -qf "pr/${CIRCLE_PR_NUMBER}/merge" || (
22 | echo Could not fetch merge commit. >&2
23 | echo There may be conflicts in merging PR \#${CIRCLE_PR_NUMBER} with main. >&2;
24 | exit 1)
25 | fi
26 |
27 | # Check for merge conflicts.
28 | if [[ -n "${CIRCLE_PR_NUMBER}" ]]
29 | then
30 | git branch --merged | grep main > /dev/null
31 | git branch --merged | grep "pr/${CIRCLE_PR_NUMBER}/head" > /dev/null
32 | fi
33 |
--------------------------------------------------------------------------------
/build_tools/codespell_ignore_words.txt:
--------------------------------------------------------------------------------
1 | aggresive
2 | aline
3 | ba
4 | basf
5 | boun
6 | bre
7 | cach
8 | complies
9 | coo
10 | copys
11 | deine
12 | didi
13 | feld
14 | fo
15 | fpr
16 | fro
17 | fwe
18 | gool
19 | hart
20 | hist
21 | ines
22 | inout
23 | ist
24 | jaques
25 | linke
26 | lod
27 | mape
28 | mor
29 | nd
30 | nmae
31 | ocur
32 | pullrequest
33 | ro
34 | soler
35 | suh
36 | suprised
37 | te
38 | technic
39 | teh
40 | thi
41 | usal
42 | vie
43 | wan
44 | winn
45 | yau
46 |
--------------------------------------------------------------------------------
/build_tools/github/Windows:
--------------------------------------------------------------------------------
1 | # Get the Python version of the base image from a build argument
2 | ARG PYTHON_VERSION
3 | FROM winamd64/python:$PYTHON_VERSION-windowsservercore
4 |
5 | ARG WHEEL_NAME
6 | ARG CONFTEST_NAME
7 | ARG CIBW_TEST_REQUIRES
8 |
9 | # Copy and install the Windows wheel
10 | COPY $WHEEL_NAME $WHEEL_NAME
11 | COPY $CONFTEST_NAME $CONFTEST_NAME
12 | RUN pip install $env:WHEEL_NAME
13 |
14 | # Install the testing dependencies
15 | RUN pip install $env:CIBW_TEST_REQUIRES.split(" ")
16 |
--------------------------------------------------------------------------------
/build_tools/github/build_minimal_windows_image.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | PYTHON_VERSION=$1
7 | BITNESS=$2
8 |
9 | if [[ "$BITNESS" == "32" ]]; then
10 | # 32-bit architectures are not supported
11 | # by the official Docker images: Tests will just be run
12 | # on the host (instead of the minimal Docker container).
13 | exit 0
14 | fi
15 |
16 | TEMP_FOLDER="$HOME/AppData/Local/Temp"
17 | WHEEL_PATH=$(ls -d $TEMP_FOLDER/*/repaired_wheel/*)
18 | WHEEL_NAME=$(basename $WHEEL_PATH)
19 |
20 | cp $WHEEL_PATH $WHEEL_NAME
21 |
22 | # Dot the Python version for identyfing the base Docker image
23 | PYTHON_VERSION=$(echo ${PYTHON_VERSION:0:1}.${PYTHON_VERSION:1:2})
24 |
25 | # Build a minimal Windows Docker image for testing the wheels
26 | docker build --build-arg PYTHON_VERSION=$PYTHON_VERSION \
27 | --build-arg WHEEL_NAME=$WHEEL_NAME \
28 | --build-arg CONFTEST_NAME=$CONFTEST_NAME \
29 | --build-arg CIBW_TEST_REQUIRES="$CIBW_TEST_REQUIRES" \
30 | -f build_tools/github/Windows \
31 | -t scikit-learn/minimal-windows .
32 |
--------------------------------------------------------------------------------
/build_tools/github/build_source.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | # Move up two levels to create the virtual
7 | # environment outside of the source folder
8 | cd ../../
9 |
10 | python -m venv build_env
11 | source build_env/bin/activate
12 |
13 | python -m pip install numpy scipy cython
14 | python -m pip install twine
15 |
16 | cd scikit-learn/scikit-learn
17 | python setup.py sdist
18 |
19 | # Check whether the source distribution will render correctly
20 | twine check dist/*.tar.gz
21 |
--------------------------------------------------------------------------------
/build_tools/github/check_build_trigger.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | COMMIT_MSG=$(git log --no-merges -1 --oneline)
7 |
8 | # The commit marker "[cd build]" or "[cd build gh]" will trigger the build when required
9 | if [[ "$GITHUB_EVENT_NAME" == schedule ||
10 | "$COMMIT_MSG" =~ \[cd\ build\] ||
11 | "$COMMIT_MSG" =~ \[cd\ build\ gh\] ]]; then
12 | echo "::set-output name=build::true"
13 | fi
14 |
--------------------------------------------------------------------------------
/build_tools/github/check_wheels.py:
--------------------------------------------------------------------------------
1 | """Checks that dist/* contains the number of wheels built from the
2 | .github/workflows/wheels.yml config."""
3 | import yaml
4 | from pathlib import Path
5 | import sys
6 |
7 | gh_wheel_path = Path.cwd() / ".github" / "workflows" / "wheels.yml"
8 | with gh_wheel_path.open("r") as f:
9 | wheel_config = yaml.safe_load(f)
10 |
11 | build_matrix = wheel_config["jobs"]["build_wheels"]["strategy"]["matrix"]["include"]
12 | n_wheels = len(build_matrix)
13 |
14 | # plus one more for the sdist
15 | n_wheels += 1
16 |
17 | # aarch64 builds from travis
18 | travis_config_path = Path.cwd() / ".travis.yml"
19 | with travis_config_path.open("r") as f:
20 | travis_config = yaml.safe_load(f)
21 |
22 | jobs = travis_config["jobs"]["include"]
23 | travis_builds = [j for j in jobs if any("CIBW_BUILD" in env for env in j["env"])]
24 | n_wheels += len(travis_builds)
25 |
26 | dist_files = list(Path("dist").glob("**/*"))
27 | n_dist_files = len(dist_files)
28 |
29 | if n_dist_files != n_wheels:
30 | print(
31 | f"Expected {n_wheels} wheels in dist/* but "
32 | f"got {n_dist_files} artifacts instead."
33 | )
34 | sys.exit(1)
35 |
36 | print(f"dist/* has the expected {n_wheels} wheels:")
37 | print("\n".join(file.name for file in dist_files))
38 |
--------------------------------------------------------------------------------
/build_tools/github/repair_windows_wheels.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | WHEEL=$1
7 | DEST_DIR=$2
8 | BITNESS=$3
9 |
10 | # By default, the Windows wheels are not repaired.
11 | # In this case, we need to vendor VCRUNTIME140.dll
12 | wheel unpack "$WHEEL"
13 | WHEEL_DIRNAME=$(ls -d scikit_learn-*)
14 | python build_tools/github/vendor.py "$WHEEL_DIRNAME" "$BITNESS"
15 | wheel pack "$WHEEL_DIRNAME" -d "$DEST_DIR"
16 | rm -rf "$WHEEL_DIRNAME"
17 |
--------------------------------------------------------------------------------
/build_tools/github/test_source.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | cd ../../
7 |
8 | python -m venv test_env
9 | source test_env/bin/activate
10 |
11 | python -m pip install scikit-learn/scikit-learn/dist/*.tar.gz
12 | python -m pip install pytest pandas
13 |
14 | # Run the tests on the installed source distribution
15 | mkdir tmp_for_test
16 | cp scikit-learn/scikit-learn/conftest.py tmp_for_test
17 | cd tmp_for_test
18 |
19 | pytest --pyargs sklearn
20 |
--------------------------------------------------------------------------------
/build_tools/github/test_wheels.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | if [[ "$OSTYPE" != "linux-gnu" ]]; then
7 | # The Linux test environment is run in a Docker container and
8 | # it is not possible to copy the test configuration file (yet)
9 | cp $CONFTEST_PATH $CONFTEST_NAME
10 | fi
11 |
12 | # Test that there are no links to system libraries in the
13 | # threadpoolctl output section of the show_versions output:
14 | python -c "import sklearn; sklearn.show_versions()"
15 | pytest --pyargs sklearn
16 |
--------------------------------------------------------------------------------
/build_tools/github/test_windows_wheels.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | PYTHON_VERSION=$1
7 | BITNESS=$2
8 |
9 | if [[ "$BITNESS" == "32" ]]; then
10 | # 32-bit architectures use the regular
11 | # test command (outside of the minimal Docker container)
12 | cp $CONFTEST_PATH $CONFTEST_NAME
13 | python -c "import sklearn; sklearn.show_versions()"
14 | pytest --pyargs sklearn
15 | else
16 | docker container run \
17 | --rm scikit-learn/minimal-windows \
18 | powershell -Command "python -c 'import sklearn; sklearn.show_versions()'"
19 |
20 | docker container run \
21 | -e SKLEARN_SKIP_NETWORK_TESTS=1 \
22 | -e OMP_NUM_THREADS=2 \
23 | -e OPENBLAS_NUM_THREADS=2 \
24 | --rm scikit-learn/minimal-windows \
25 | powershell -Command "pytest --pyargs sklearn"
26 | fi
27 |
--------------------------------------------------------------------------------
/build_tools/github/upload_anaconda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | if [ "$GITHUB_EVENT_NAME" == "schedule" ]; then
7 | ANACONDA_ORG="scipy-wheels-nightly"
8 | ANACONDA_TOKEN="$SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN"
9 | else
10 | ANACONDA_ORG="scikit-learn-wheels-staging"
11 | ANACONDA_TOKEN="$SCIKIT_LEARN_STAGING_UPLOAD_TOKEN"
12 | fi
13 |
14 | # Install Python 3.8 because of a bug with Python 3.9
15 | export PATH=$CONDA/bin:$PATH
16 | conda create -n upload -y python=3.8
17 | source activate upload
18 | conda install -y anaconda-client
19 |
20 | # Force a replacement if the remote file already exists
21 | anaconda -t $ANACONDA_TOKEN upload --force -u $ANACONDA_ORG dist/artifact/*
22 | echo "Index: https://pypi.anaconda.org/$ANACONDA_ORG/simple"
23 |
--------------------------------------------------------------------------------
/build_tools/shared.sh:
--------------------------------------------------------------------------------
1 | get_dep() {
2 | package="$1"
3 | version="$2"
4 | if [[ "$version" == "none" ]]; then
5 | # do not install with none
6 | echo
7 | elif [[ "${version%%[^0-9.]*}" ]]; then
8 | # version number is explicitly passed
9 | echo "$package==$version"
10 | elif [[ "$version" == "latest" ]]; then
11 | # use latest
12 | echo "$package"
13 | elif [[ "$version" == "min" ]]; then
14 | echo "$package==$(python sklearn/_min_dependencies.py $package)"
15 | fi
16 | }
17 |
18 | show_installed_libraries(){
19 | # use conda list when inside a conda environment. conda list shows more
20 | # info than pip list, e.g. whether OpenBLAS or MKL is installed as well as
21 | # the version of OpenBLAS or MKL
22 | if [[ -n "$CONDA_PREFIX" ]]; then
23 | conda list
24 | else
25 | python -m pip list
26 | fi
27 | }
28 |
--------------------------------------------------------------------------------
/build_tools/travis/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script is meant to be called by the "install" step
4 | # defined in the ".travis.yml" file. In particular, it is
5 | # important that we call to the right installation script.
6 |
7 | if [[ $BUILD_WHEEL == true ]]; then
8 | source build_tools/travis/install_wheels.sh || travis_terminate 1
9 | else
10 | source build_tools/travis/install_main.sh || travis_terminate 1
11 | fi
12 |
--------------------------------------------------------------------------------
/build_tools/travis/install_wheels.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | python -m pip install cibuildwheel || travis_terminate $?
4 | python -m cibuildwheel --output-dir wheelhouse || travis_terminate $?
5 |
--------------------------------------------------------------------------------
/build_tools/travis/script.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script is meant to be called by the "script" step defined
4 | # in the ".travis.yml" file. While this step is forbidden by the
5 | # continuous deployment jobs, we have to execute the scripts for
6 | # testing the continuous integration jobs.
7 |
8 | if [[ $BUILD_WHEEL != true ]]; then
9 | # This trick will make Travis terminate the continuation of the pipeline
10 | bash build_tools/travis/test_script.sh || travis_terminate 1
11 | bash build_tools/travis/test_docs.sh || travis_terminate 1
12 | fi
13 |
--------------------------------------------------------------------------------
/build_tools/travis/test_docs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | if [[ $TRAVIS_CPU_ARCH != arm64 ]]; then
6 | # Faster run of the documentation tests
7 | PYTEST="pytest -n $CPU_COUNT" make test-doc
8 | fi
9 |
--------------------------------------------------------------------------------
/build_tools/travis/test_script.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | python --version
6 | python -c "import numpy; print(f'numpy {numpy.__version__}')"
7 | python -c "import scipy; print(f'scipy {scipy.__version__}')"
8 | python -c "\
9 | try:
10 | import pandas
11 | print(f'pandas {pandas.__version__}')
12 | except ImportError:
13 | pass
14 | "
15 | python -c "import joblib; print(f'{joblib.cpu_count()} CPUs')"
16 | python -c "import platform; print(f'{platform.machine()}')"
17 |
18 | TEST_CMD="pytest --showlocals --durations=20 --pyargs"
19 |
20 | # Run the tests on the installed version
21 | mkdir -p $TEST_DIR
22 |
23 | # Copy "setup.cfg" for the test settings
24 | cp setup.cfg $TEST_DIR
25 | cd $TEST_DIR
26 |
27 | if [[ $TRAVIS_CPU_ARCH == arm64 ]]; then
28 | # Faster run of the source code tests
29 | TEST_CMD="$TEST_CMD -n $CPU_COUNT"
30 |
31 | # Remove the option to test the docstring
32 | sed -i -e 's/--doctest-modules//g' setup.cfg
33 | fi
34 |
35 | if [[ -n $CHECK_WARNINGS ]]; then
36 | TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning"
37 | fi
38 |
39 | $TEST_CMD sklearn
40 |
--------------------------------------------------------------------------------
/build_tools/travis/test_wheels.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | pip install --upgrade pip || travis_terminate $?
4 | pip install pytest pytest-xdist || travis_terminate $?
5 |
6 | # Test that there are no links to system libraries in the threadpoolctl
7 | # section of the show_versions output.
8 | python -c "import sklearn; sklearn.show_versions()" || travis_terminate $?
9 | python -m pytest -n $CPU_COUNT --pyargs sklearn || travis_terminate $?
10 |
--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
1 | # Even if empty this file is useful so that when running from the root folder
2 | # ./sklearn is added to sys.path by pytest. See
3 | # https://docs.pytest.org/en/latest/explanation/pythonpath.html for more
4 | # details. For example, this allows to build extensions in place and run pytest
5 | # doc/modules/clustering.rst and use sklearn from the local folder rather than
6 | # the one from site-packages.
7 |
--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
1 | # Documentation for scikit-learn
2 |
3 | This directory contains the full manual and website as displayed at
4 | http://scikit-learn.org. See
5 | http://scikit-learn.org/dev/developers/contributing.html#documentation for
6 | detailed information about the documentation.
7 |
--------------------------------------------------------------------------------
/doc/authors_emeritus.rst:
--------------------------------------------------------------------------------
1 | - Mathieu Blondel
2 | - Matthieu Brucher
3 | - Lars Buitinck
4 | - David Cournapeau
5 | - Noel Dawe
6 | - Vincent Dubourg
7 | - Edouard Duchesnay
8 | - Alexander Fabisch
9 | - Virgile Fritsch
10 | - Satrajit Ghosh
11 | - Angel Soler Gollonet
12 | - Chris Gorgolewski
13 | - Jaques Grobler
14 | - Brian Holt
15 | - Arnaud Joly
16 | - Thouis (Ray) Jones
17 | - Kyle Kastner
18 | - manoj kumar
19 | - Robert Layton
20 | - Wei Li
21 | - Paolo Losi
22 | - Gilles Louppe
23 | - Vincent Michel
24 | - Jarrod Millman
25 | - Alexandre Passos
26 | - Fabian Pedregosa
27 | - Peter Prettenhofer
28 | - (Venkat) Raghav, Rajagopalan
29 | - Jacob Schreiber
30 | - Du Shiqiao
31 | - Jake Vanderplas
32 | - David Warde-Farley
33 | - Ron Weiss
34 |
--------------------------------------------------------------------------------
/doc/binder/requirements.txt:
--------------------------------------------------------------------------------
1 | # A binder requirement file is required by sphinx-gallery.
2 | # We don't really need one since our binder requirement file lives in the
3 | # .binder directory.
4 | # This file can be removed if 'dependencies' is made an optional key for
5 | # binder in sphinx-gallery.
6 |
--------------------------------------------------------------------------------
/doc/communication_team.rst:
--------------------------------------------------------------------------------
1 | .. raw :: html
2 |
3 |
4 |
5 |
8 |
9 |
10 |
Reshama Shaikh
11 |
12 |
13 |
14 |
Lauren Burke
15 |
16 |
17 |
--------------------------------------------------------------------------------
/doc/computing.rst:
--------------------------------------------------------------------------------
1 | .. Places parent toc into the sidebar
2 |
3 | :parenttoc: True
4 |
5 | ============================
6 | Computing with scikit-learn
7 | ============================
8 |
9 | .. include:: includes/big_toc_css.rst
10 |
11 | .. toctree::
12 | :maxdepth: 2
13 |
14 | computing/scaling_strategies
15 | computing/computational_performance
16 | computing/parallelism
17 |
--------------------------------------------------------------------------------
/doc/contents.rst:
--------------------------------------------------------------------------------
1 | .. include:: includes/big_toc_css.rst
2 | .. include:: tune_toc.rst
3 |
4 | .. Places global toc into the sidebar
5 |
6 | :globalsidebartoc: True
7 |
8 | =================
9 | Table Of Contents
10 | =================
11 |
12 | .. Define an order for the Table of Contents:
13 |
14 | .. toctree::
15 | :maxdepth: 2
16 |
17 | preface
18 | tutorial/index
19 | getting_started
20 | user_guide
21 | glossary
22 | auto_examples/index
23 | modules/classes
24 | developers/index
25 |
--------------------------------------------------------------------------------
/doc/datasets/real_world.rst:
--------------------------------------------------------------------------------
1 | .. Places parent toc into the sidebar
2 |
3 | :parenttoc: True
4 |
5 | .. _real_world_datasets:
6 |
7 | Real world datasets
8 | ===================
9 |
10 | .. currentmodule:: sklearn.datasets
11 |
12 | scikit-learn provides tools to load larger datasets, downloading them if
13 | necessary.
14 |
15 | They can be loaded using the following functions:
16 |
17 | .. autosummary::
18 |
19 | fetch_olivetti_faces
20 | fetch_20newsgroups
21 | fetch_20newsgroups_vectorized
22 | fetch_lfw_people
23 | fetch_lfw_pairs
24 | fetch_covtype
25 | fetch_rcv1
26 | fetch_kddcup99
27 | fetch_california_housing
28 |
29 | .. include:: ../../sklearn/datasets/descr/olivetti_faces.rst
30 |
31 | .. include:: ../../sklearn/datasets/descr/twenty_newsgroups.rst
32 |
33 | .. include:: ../../sklearn/datasets/descr/lfw.rst
34 |
35 | .. include:: ../../sklearn/datasets/descr/covtype.rst
36 |
37 | .. include:: ../../sklearn/datasets/descr/rcv1.rst
38 |
39 | .. include:: ../../sklearn/datasets/descr/kddcup99.rst
40 |
41 | .. include:: ../../sklearn/datasets/descr/california_housing.rst
42 |
--------------------------------------------------------------------------------
/doc/datasets/toy_dataset.rst:
--------------------------------------------------------------------------------
1 | .. Places parent toc into the sidebar
2 |
3 | :parenttoc: True
4 |
5 | .. _toy_datasets:
6 |
7 | Toy datasets
8 | ============
9 |
10 | .. currentmodule:: sklearn.datasets
11 |
12 | scikit-learn comes with a few small standard datasets that do not require to
13 | download any file from some external website.
14 |
15 | They can be loaded using the following functions:
16 |
17 | .. autosummary::
18 |
19 | load_boston
20 | load_iris
21 | load_diabetes
22 | load_digits
23 | load_linnerud
24 | load_wine
25 | load_breast_cancer
26 |
27 | These datasets are useful to quickly illustrate the behavior of the
28 | various algorithms implemented in scikit-learn. They are however often too
29 | small to be representative of real world machine learning tasks.
30 |
31 | .. include:: ../../sklearn/datasets/descr/boston_house_prices.rst
32 |
33 | .. include:: ../../sklearn/datasets/descr/iris.rst
34 |
35 | .. include:: ../../sklearn/datasets/descr/diabetes.rst
36 |
37 | .. include:: ../../sklearn/datasets/descr/digits.rst
38 |
39 | .. include:: ../../sklearn/datasets/descr/linnerud.rst
40 |
41 | .. include:: ../../sklearn/datasets/descr/wine_data.rst
42 |
43 | .. include:: ../../sklearn/datasets/descr/breast_cancer.rst
44 |
--------------------------------------------------------------------------------
/doc/developers/index.rst:
--------------------------------------------------------------------------------
1 | .. Places parent toc into the sidebar
2 |
3 | :parenttoc: True
4 |
5 | .. _developers_guide:
6 |
7 | =================
8 | Developer's Guide
9 | =================
10 |
11 | .. include:: ../includes/big_toc_css.rst
12 | .. include:: ../tune_toc.rst
13 |
14 | .. toctree::
15 |
16 | contributing
17 | minimal_reproducer
18 | develop
19 | tips
20 | utilities
21 | performance
22 | advanced_installation
23 | bug_triaging
24 | maintainer
25 | plotting
26 |
--------------------------------------------------------------------------------
/doc/images/axa-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/axa-small.png
--------------------------------------------------------------------------------
/doc/images/axa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/axa.png
--------------------------------------------------------------------------------
/doc/images/bcg-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/bcg-small.png
--------------------------------------------------------------------------------
/doc/images/bcg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/bcg.png
--------------------------------------------------------------------------------
/doc/images/bnp-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/bnp-small.png
--------------------------------------------------------------------------------
/doc/images/bnp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/bnp.png
--------------------------------------------------------------------------------
/doc/images/cds-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/cds-logo.png
--------------------------------------------------------------------------------
/doc/images/columbia-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/columbia-small.png
--------------------------------------------------------------------------------
/doc/images/columbia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/columbia.png
--------------------------------------------------------------------------------
/doc/images/dataiku-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/dataiku-small.png
--------------------------------------------------------------------------------
/doc/images/dataiku.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/dataiku.png
--------------------------------------------------------------------------------
/doc/images/digicosme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/digicosme.png
--------------------------------------------------------------------------------
/doc/images/dysco.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/dysco.png
--------------------------------------------------------------------------------
/doc/images/fnrs-logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/fnrs-logo-small.png
--------------------------------------------------------------------------------
/doc/images/fujitsu-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/fujitsu-small.png
--------------------------------------------------------------------------------
/doc/images/fujitsu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/fujitsu.png
--------------------------------------------------------------------------------
/doc/images/google-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/google-small.png
--------------------------------------------------------------------------------
/doc/images/grid_search_cross_validation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/grid_search_cross_validation.png
--------------------------------------------------------------------------------
/doc/images/grid_search_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/grid_search_workflow.png
--------------------------------------------------------------------------------
/doc/images/huggingface_logo-noborder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/huggingface_logo-noborder.png
--------------------------------------------------------------------------------
/doc/images/inria-logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/inria-logo.jpg
--------------------------------------------------------------------------------
/doc/images/inria-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/inria-small.png
--------------------------------------------------------------------------------
/doc/images/intel-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/intel-small.png
--------------------------------------------------------------------------------
/doc/images/intel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/intel.png
--------------------------------------------------------------------------------
/doc/images/iris.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/iris.pdf
--------------------------------------------------------------------------------
/doc/images/last_digit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/last_digit.png
--------------------------------------------------------------------------------
/doc/images/lda_model_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/lda_model_graph.png
--------------------------------------------------------------------------------
/doc/images/logo_APHP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/logo_APHP.png
--------------------------------------------------------------------------------
/doc/images/logo_APHP_text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/logo_APHP_text.png
--------------------------------------------------------------------------------
/doc/images/microsoft-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/microsoft-small.png
--------------------------------------------------------------------------------
/doc/images/microsoft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/microsoft.png
--------------------------------------------------------------------------------
/doc/images/ml_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/ml_map.png
--------------------------------------------------------------------------------
/doc/images/multi_org_chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/multi_org_chart.png
--------------------------------------------------------------------------------
/doc/images/multilayerperceptron_network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/multilayerperceptron_network.png
--------------------------------------------------------------------------------
/doc/images/no_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/no_image.png
--------------------------------------------------------------------------------
/doc/images/nvidia-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/nvidia-small.png
--------------------------------------------------------------------------------
/doc/images/nvidia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/nvidia.png
--------------------------------------------------------------------------------
/doc/images/nyu_short_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/nyu_short_color.png
--------------------------------------------------------------------------------
/doc/images/plot_digits_classification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/plot_digits_classification.png
--------------------------------------------------------------------------------
/doc/images/plot_face_recognition_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/plot_face_recognition_1.png
--------------------------------------------------------------------------------
/doc/images/plot_face_recognition_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/plot_face_recognition_2.png
--------------------------------------------------------------------------------
/doc/images/png-logo-inria-la-fondation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/png-logo-inria-la-fondation.png
--------------------------------------------------------------------------------
/doc/images/quansight-labs-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/quansight-labs-small.png
--------------------------------------------------------------------------------
/doc/images/quansight-labs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/quansight-labs.png
--------------------------------------------------------------------------------
/doc/images/rbm_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/rbm_graph.png
--------------------------------------------------------------------------------
/doc/images/scikit-learn-logo-notext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/scikit-learn-logo-notext.png
--------------------------------------------------------------------------------
/doc/images/scikit-learn-logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/scikit-learn-logo-small.png
--------------------------------------------------------------------------------
/doc/images/sloan_banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/sloan_banner.png
--------------------------------------------------------------------------------
/doc/images/sloan_logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/sloan_logo-small.png
--------------------------------------------------------------------------------
/doc/images/sydney-primary.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/sydney-primary.jpeg
--------------------------------------------------------------------------------
/doc/images/sydney-stacked-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/sydney-stacked-small.png
--------------------------------------------------------------------------------
/doc/images/telecom-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/telecom-small.png
--------------------------------------------------------------------------------
/doc/images/telecom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/telecom.png
--------------------------------------------------------------------------------
/doc/images/visual-studio-build-tools-selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/visual-studio-build-tools-selection.png
--------------------------------------------------------------------------------
/doc/includes/big_toc_css.rst:
--------------------------------------------------------------------------------
1 | ..
2 | File to ..include in a document with a big table of content, to give
3 | it 'style'
4 |
5 | .. raw:: html
6 |
7 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/doc/inspection.rst:
--------------------------------------------------------------------------------
1 | .. Places parent toc into the sidebar
2 |
3 | :parenttoc: True
4 |
5 | .. include:: includes/big_toc_css.rst
6 |
7 | .. _inspection:
8 |
9 | Inspection
10 | ----------
11 |
12 | Predictive performance is often the main goal of developing machine learning
13 | models. Yet summarising performance with an evaluation metric is often
14 | insufficient: it assumes that the evaluation metric and test dataset
15 | perfectly reflect the target domain, which is rarely true. In certain domains,
16 | a model needs a certain level of interpretability before it can be deployed.
17 | A model that is exhibiting performance issues needs to be debugged for one to
18 | understand the model's underlying issue. The
19 | :mod:`sklearn.inspection` module provides tools to help understand the
20 | predictions from a model and what affects them. This can be used to
21 | evaluate assumptions and biases of a model, design a better model, or
22 | to diagnose issues with model performance.
23 |
24 | .. topic:: Examples:
25 |
26 | * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`
27 |
28 | .. toctree::
29 |
30 | modules/partial_dependence
31 | modules/permutation_importance
32 |
--------------------------------------------------------------------------------
/doc/logos/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/favicon.ico
--------------------------------------------------------------------------------
/doc/logos/identity.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/identity.pdf
--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo-notext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo-notext.png
--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo-small.png
--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo-thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo-thumb.png
--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo.bmp
--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo.png
--------------------------------------------------------------------------------
/doc/model_selection.rst:
--------------------------------------------------------------------------------
1 | .. Places parent toc into the sidebar
2 |
3 | :parenttoc: True
4 |
5 | .. include:: includes/big_toc_css.rst
6 |
7 | .. _model_selection:
8 |
9 | Model selection and evaluation
10 | ------------------------------
11 |
12 | .. toctree::
13 | :maxdepth: 2
14 |
15 | modules/cross_validation
16 | modules/grid_search
17 | modules/model_evaluation
18 | modules/learning_curve
19 |
--------------------------------------------------------------------------------
/doc/modules/glm_data/lasso_enet_coordinate_descent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/modules/glm_data/lasso_enet_coordinate_descent.png
--------------------------------------------------------------------------------
/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png
--------------------------------------------------------------------------------
/doc/modules/pipeline.rst:
--------------------------------------------------------------------------------
1 | :orphan:
2 |
3 | .. raw:: html
4 |
5 |
6 |
9 |
10 | This content is now at :ref:`combining_estimators`.
11 |
--------------------------------------------------------------------------------
/doc/preface.rst:
--------------------------------------------------------------------------------
1 | .. This helps define the TOC ordering for "about us" sections. Particularly
2 | useful for PDF output as this section is not linked from elsewhere.
3 |
4 | .. Places global toc into the sidebar
5 |
6 | :globalsidebartoc: True
7 |
8 | .. _preface_menu:
9 |
10 | .. include:: includes/big_toc_css.rst
11 | .. include:: tune_toc.rst
12 |
13 | =======================
14 | Welcome to scikit-learn
15 | =======================
16 |
17 | |
18 |
19 | .. toctree::
20 | :maxdepth: 2
21 |
22 | install
23 | faq
24 | support
25 | related_projects
26 | about
27 | testimonials/testimonials
28 | whats_new
29 | roadmap
30 | governance
31 |
32 | |
33 |
--------------------------------------------------------------------------------
/doc/sphinxext/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
3 |
--------------------------------------------------------------------------------
/doc/supervised_learning.rst:
--------------------------------------------------------------------------------
1 | .. Places parent toc into the sidebar
2 |
3 | :parenttoc: True
4 |
5 | .. include:: includes/big_toc_css.rst
6 |
7 | .. _supervised-learning:
8 |
9 | Supervised learning
10 | -------------------
11 |
12 | .. toctree::
13 | :maxdepth: 2
14 |
15 | modules/linear_model
16 | modules/lda_qda.rst
17 | modules/kernel_ridge.rst
18 | modules/svm
19 | modules/sgd
20 | modules/neighbors
21 | modules/gaussian_process
22 | modules/cross_decomposition.rst
23 | modules/naive_bayes
24 | modules/tree
25 | modules/ensemble
26 | modules/multiclass
27 | modules/feature_selection.rst
28 | modules/semi_supervised.rst
29 | modules/isotonic.rst
30 | modules/calibration.rst
31 | modules/neural_networks_supervised
32 |
--------------------------------------------------------------------------------
/doc/templates/class.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}==============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | .. include:: {{module}}.{{objname}}.examples
9 |
10 | .. raw:: html
11 |
12 |
13 |
--------------------------------------------------------------------------------
/doc/templates/class_with_call.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}===============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | {% block methods %}
9 | .. automethod:: __call__
10 | {% endblock %}
11 |
12 | .. include:: {{module}}.{{objname}}.examples
13 |
14 | .. raw:: html
15 |
16 |
17 |
--------------------------------------------------------------------------------
/doc/templates/deprecated_class.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}==============
3 |
4 | .. meta::
5 | :robots: noindex
6 |
7 | .. warning::
8 | **DEPRECATED**
9 |
10 |
11 | .. currentmodule:: {{ module }}
12 |
13 | .. autoclass:: {{ objname }}
14 |
15 | {% block methods %}
16 | .. automethod:: __init__
17 | {% endblock %}
18 |
19 | .. include:: {{module}}.{{objname}}.examples
20 |
21 | .. raw:: html
22 |
23 |
24 |
--------------------------------------------------------------------------------
/doc/templates/deprecated_class_with_call.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}===============
3 |
4 | .. meta::
5 | :robots: noindex
6 |
7 | .. warning::
8 | **DEPRECATED**
9 |
10 |
11 | .. currentmodule:: {{ module }}
12 |
13 | .. autoclass:: {{ objname }}
14 |
15 | {% block methods %}
16 | .. automethod:: __init__
17 | .. automethod:: __call__
18 | {% endblock %}
19 |
20 | .. include:: {{module}}.{{objname}}.examples
21 |
22 | .. raw:: html
23 |
24 |
25 |
--------------------------------------------------------------------------------
/doc/templates/deprecated_class_without_init.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}==============
3 |
4 | .. meta::
5 | :robots: noindex
6 |
7 | .. warning::
8 | **DEPRECATED**
9 |
10 |
11 | .. currentmodule:: {{ module }}
12 |
13 | .. autoclass:: {{ objname }}
14 |
15 | .. include:: {{module}}.{{objname}}.examples
16 |
17 | .. raw:: html
18 |
19 |
20 |
--------------------------------------------------------------------------------
/doc/templates/deprecated_function.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}====================
3 |
4 | .. meta::
5 | :robots: noindex
6 |
7 | .. warning::
8 | **DEPRECATED**
9 |
10 |
11 | .. currentmodule:: {{ module }}
12 |
13 | .. autofunction:: {{ objname }}
14 |
15 | .. include:: {{module}}.{{objname}}.examples
16 |
17 | .. raw:: html
18 |
19 |
20 |
--------------------------------------------------------------------------------
/doc/templates/function.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}====================
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autofunction:: {{ objname }}
7 |
8 | .. include:: {{module}}.{{objname}}.examples
9 |
10 | .. raw:: html
11 |
12 |
13 |
--------------------------------------------------------------------------------
/doc/templates/generate_deprecated.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for f in [^d]*; do (head -n2 < $f; echo '
3 | .. meta::
4 | :robots: noindex
5 |
6 | .. warning::
7 | **DEPRECATED**
8 | '; tail -n+3 $f) > deprecated_$f; done
9 |
--------------------------------------------------------------------------------
/doc/templates/numpydoc_docstring.rst:
--------------------------------------------------------------------------------
1 | {{index}}
2 | {{summary}}
3 | {{extended_summary}}
4 | {{parameters}}
5 | {{returns}}
6 | {{yields}}
7 | {{other_parameters}}
8 | {{attributes}}
9 | {{raises}}
10 | {{warns}}
11 | {{warnings}}
12 | {{see_also}}
13 | {{notes}}
14 | {{references}}
15 | {{examples}}
16 | {{methods}}
17 |
--------------------------------------------------------------------------------
/doc/templates/redirects.html:
--------------------------------------------------------------------------------
1 | {% set redirect = pathto(redirects[pagename]) %}
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | scikit-learn: machine learning in Python
11 |
12 |
13 | You will be automatically redirected to the new location of this page.
14 |
15 |
16 |
--------------------------------------------------------------------------------
/doc/testimonials/README.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | To find the list of people we contacted, see:
4 | https://docs.google.com/spreadsheet/ccc?key=0AhGnAxuBDhjmdDYwNzlZVE5SMkFsMjNBbGlaWkpNZ1E&usp=sharing
5 |
6 | To obtain access to this file, send an email to:
7 | nelle dot varoquaux at gmail dot com
8 |
9 |
--------------------------------------------------------------------------------
/doc/testimonials/images/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/Makefile
--------------------------------------------------------------------------------
/doc/testimonials/images/aweber.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/aweber.png
--------------------------------------------------------------------------------
/doc/testimonials/images/bestofmedia-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/bestofmedia-logo.png
--------------------------------------------------------------------------------
/doc/testimonials/images/betaworks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/betaworks.png
--------------------------------------------------------------------------------
/doc/testimonials/images/birchbox.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/birchbox.jpg
--------------------------------------------------------------------------------
/doc/testimonials/images/bnp_paribas_cardif.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/bnp_paribas_cardif.png
--------------------------------------------------------------------------------
/doc/testimonials/images/booking.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/booking.png
--------------------------------------------------------------------------------
/doc/testimonials/images/change-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/change-logo.png
--------------------------------------------------------------------------------
/doc/testimonials/images/dataiku_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/dataiku_logo.png
--------------------------------------------------------------------------------
/doc/testimonials/images/datapublica.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/datapublica.png
--------------------------------------------------------------------------------
/doc/testimonials/images/datarobot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/datarobot.png
--------------------------------------------------------------------------------
/doc/testimonials/images/evernote.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/evernote.png
--------------------------------------------------------------------------------
/doc/testimonials/images/howaboutwe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/howaboutwe.png
--------------------------------------------------------------------------------
/doc/testimonials/images/huggingface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/huggingface.png
--------------------------------------------------------------------------------
/doc/testimonials/images/infonea.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/infonea.jpg
--------------------------------------------------------------------------------
/doc/testimonials/images/inria.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/inria.png
--------------------------------------------------------------------------------
/doc/testimonials/images/jpmorgan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/jpmorgan.png
--------------------------------------------------------------------------------
/doc/testimonials/images/lovely.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/lovely.png
--------------------------------------------------------------------------------
/doc/testimonials/images/machinalis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/machinalis.png
--------------------------------------------------------------------------------
/doc/testimonials/images/mars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/mars.png
--------------------------------------------------------------------------------
/doc/testimonials/images/okcupid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/okcupid.png
--------------------------------------------------------------------------------
/doc/testimonials/images/ottogroup_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/ottogroup_logo.png
--------------------------------------------------------------------------------
/doc/testimonials/images/peerindex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/peerindex.png
--------------------------------------------------------------------------------
/doc/testimonials/images/phimeca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/phimeca.png
--------------------------------------------------------------------------------
/doc/testimonials/images/rangespan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/rangespan.png
--------------------------------------------------------------------------------
/doc/testimonials/images/solido_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/solido_logo.png
--------------------------------------------------------------------------------
/doc/testimonials/images/spotify.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/spotify.png
--------------------------------------------------------------------------------
/doc/testimonials/images/telecomparistech.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/telecomparistech.jpg
--------------------------------------------------------------------------------
/doc/testimonials/images/yhat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/yhat.png
--------------------------------------------------------------------------------
/doc/testimonials/images/zopa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/zopa.png
--------------------------------------------------------------------------------
/doc/themes/scikit-learn-modern/search.html:
--------------------------------------------------------------------------------
1 | {%- extends "basic/search.html" %}
2 | {% block extrahead %}
3 |
4 |
5 |
6 |
7 |
8 | {% endblock %}
9 |
--------------------------------------------------------------------------------
/doc/themes/scikit-learn-modern/theme.conf:
--------------------------------------------------------------------------------
1 | [theme]
2 | inherit = basic
3 | pygments_style = default
4 | stylesheet = css/theme.css
5 |
6 | [options]
7 | google_analytics = true
8 | mathjax_path =
9 |
--------------------------------------------------------------------------------
/doc/tutorial/common_includes/info.txt:
--------------------------------------------------------------------------------
1 | Meant to share common RST file snippets that we want to reuse by inclusion
2 | in the real tutorial in order to lower the maintenance burden
3 | of redundant sections.
4 |
--------------------------------------------------------------------------------
/doc/tutorial/index.rst:
--------------------------------------------------------------------------------
1 | .. Places global toc into the sidebar
2 |
3 | :globalsidebartoc: True
4 |
5 | .. _tutorial_menu:
6 |
7 |
8 | .. include:: ../includes/big_toc_css.rst
9 | .. include:: ../tune_toc.rst
10 |
11 | ======================
12 | scikit-learn Tutorials
13 | ======================
14 |
15 | |
16 |
17 | .. toctree::
18 | :maxdepth: 2
19 |
20 | basic/tutorial.rst
21 | statistical_inference/index.rst
22 | text_analytics/working_with_text_data.rst
23 | machine_learning_map/index
24 | ../presentations
25 |
26 | |
27 |
28 | .. note:: **Doctest Mode**
29 |
30 | The code-examples in the above tutorials are written in a
31 | *python-console* format. If you wish to easily execute these examples
32 | in **IPython**, use::
33 |
34 | %doctest_mode
35 |
36 | in the IPython-console. You can then simply copy and paste the examples
37 | directly into IPython without having to worry about removing the **>>>**
38 | manually.
39 |
--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/.gitignore:
--------------------------------------------------------------------------------
1 | # cruft
2 | .*.swp
3 | *.pyc
4 | .DS_Store
5 | *.pdf
6 |
7 | # folder to be used for working on the exercises
8 | workspace
9 |
10 | # output of the sphinx build of the documentation
11 | tutorial/_build
12 |
13 | # datasets to be fetched from the web and cached locally
14 | data/twenty_newsgroups/20news-bydate.tar.gz
15 | data/twenty_newsgroups/20news-bydate-train
16 | data/twenty_newsgroups/20news-bydate-test
17 |
18 | data/movie_reviews/txt_sentoken
19 | data/movie_reviews/poldata.README.2.0
20 |
21 | data/languages/paragraphs
22 | data/languages/short_paragraphs
23 | data/languages/html
24 |
25 | data/labeled_faces_wild/lfw_preprocessed/
26 |
--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py:
--------------------------------------------------------------------------------
1 | """Script to download the movie review dataset"""
2 |
3 | import os
4 | import tarfile
5 | from contextlib import closing
6 | from urllib.request import urlopen
7 |
8 |
9 | URL = ("http://www.cs.cornell.edu/people/pabo/"
10 | "movie-review-data/review_polarity.tar.gz")
11 |
12 | ARCHIVE_NAME = URL.rsplit('/', 1)[1]
13 | DATA_FOLDER = "txt_sentoken"
14 |
15 |
16 | if not os.path.exists(DATA_FOLDER):
17 |
18 | if not os.path.exists(ARCHIVE_NAME):
19 | print("Downloading dataset from %s (3 MB)" % URL)
20 | opener = urlopen(URL)
21 | with open(ARCHIVE_NAME, 'wb') as archive:
22 | archive.write(opener.read())
23 |
24 | print("Decompressing %s" % ARCHIVE_NAME)
25 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive:
26 | archive.extractall(path='.')
27 | os.remove(ARCHIVE_NAME)
28 |
--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/data/twenty_newsgroups/fetch_data.py:
--------------------------------------------------------------------------------
1 | """Script to download the 20 newsgroups text classification set"""
2 |
3 | import os
4 | import tarfile
5 | from contextlib import closing
6 | from urllib.request import urlopen
7 |
8 | URL = ("http://people.csail.mit.edu/jrennie/"
9 | "20Newsgroups/20news-bydate.tar.gz")
10 |
11 | ARCHIVE_NAME = URL.rsplit('/', 1)[1]
12 | TRAIN_FOLDER = "20news-bydate-train"
13 | TEST_FOLDER = "20news-bydate-test"
14 |
15 |
16 | if not os.path.exists(TRAIN_FOLDER) or not os.path.exists(TEST_FOLDER):
17 |
18 | if not os.path.exists(ARCHIVE_NAME):
19 | print("Downloading dataset from %s (14 MB)" % URL)
20 | opener = urlopen(URL)
21 | with open(ARCHIVE_NAME, 'wb') as archive:
22 | archive.write(opener.read())
23 |
24 | print("Decompressing %s" % ARCHIVE_NAME)
25 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive:
26 | archive.extractall(path='.')
27 | os.remove(ARCHIVE_NAME)
28 |
--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/solutions/generate_skeletons.py:
--------------------------------------------------------------------------------
1 | """Generate skeletons from the example code"""
2 | import os
3 |
4 | exercise_dir = os.path.dirname(__file__)
5 | if exercise_dir == '':
6 | exercise_dir = '.'
7 |
8 | skeleton_dir = os.path.abspath(os.path.join(exercise_dir, '..', 'skeletons'))
9 | if not os.path.exists(skeleton_dir):
10 | os.makedirs(skeleton_dir)
11 |
12 | solutions = os.listdir(exercise_dir)
13 |
14 | for f in solutions:
15 | if not f.endswith('.py'):
16 | continue
17 |
18 | if f == os.path.basename(__file__):
19 | continue
20 |
21 | print("Generating skeleton for %s" % f)
22 |
23 | input_file = open(os.path.join(exercise_dir, f))
24 | output_file = open(os.path.join(skeleton_dir, f), 'w')
25 |
26 | in_exercise_region = False
27 |
28 | for line in input_file:
29 | linestrip = line.strip()
30 | if len(linestrip) == 0:
31 | in_exercise_region = False
32 | elif linestrip.startswith('# TASK:'):
33 | in_exercise_region = True
34 |
35 | if not in_exercise_region or linestrip.startswith('#'):
36 | output_file.write(line)
37 |
38 | output_file.close()
39 |
--------------------------------------------------------------------------------
/doc/unsupervised_learning.rst:
--------------------------------------------------------------------------------
1 | .. Places parent toc into the sidebar
2 |
3 | :parenttoc: True
4 |
5 | .. include:: includes/big_toc_css.rst
6 |
7 | .. _unsupervised-learning:
8 |
9 | Unsupervised learning
10 | -----------------------
11 |
12 | .. toctree::
13 | :maxdepth: 2
14 |
15 | modules/mixture
16 | modules/manifold
17 | modules/clustering
18 | modules/biclustering
19 | modules/decomposition
20 | modules/covariance
21 | modules/outlier_detection
22 | modules/density
23 | modules/neural_networks_unsupervised
24 |
--------------------------------------------------------------------------------
/doc/user_guide.rst:
--------------------------------------------------------------------------------
1 | .. Places parent toc into the sidebar
2 |
3 | :parenttoc: True
4 |
5 | .. title:: User guide: contents
6 |
7 | .. _user_guide:
8 |
9 | ==========
10 | User Guide
11 | ==========
12 |
13 | .. include:: includes/big_toc_css.rst
14 |
15 | .. nice layout in the toc
16 |
17 | .. include:: tune_toc.rst
18 |
19 | .. toctree::
20 | :numbered:
21 | :maxdepth: 3
22 |
23 | supervised_learning.rst
24 | unsupervised_learning.rst
25 | model_selection.rst
26 | inspection.rst
27 | visualizations.rst
28 | data_transforms.rst
29 | datasets.rst
30 | computing.rst
31 | model_persistence.rst
32 | common_pitfalls.rst
33 |
--------------------------------------------------------------------------------
/doc/whats_new.rst:
--------------------------------------------------------------------------------
1 | .. currentmodule:: sklearn
2 | .. include:: whats_new/_contributors.rst
3 |
4 | Release History
5 | ===============
6 |
7 | Release notes for all scikit-learn releases are linked in this page.
8 |
9 | **Tip:** `Subscribe to scikit-learn releases `__
10 | on libraries.io to be notified when new versions are released.
11 |
12 | .. toctree::
13 | :maxdepth: 1
14 |
15 | Version 1.1
16 | Version 1.0
17 | Version 0.24
18 | Version 0.23
19 | Version 0.22
20 | Version 0.21
21 | Version 0.20
22 | Version 0.19
23 | Version 0.18
24 | Version 0.17
25 | Version 0.16
26 | Version 0.15
27 | Version 0.14
28 | Version 0.13
29 | Older Versions
30 |
--------------------------------------------------------------------------------
/doc/whats_new/changelog_legend.inc:
--------------------------------------------------------------------------------
1 | Legend for changelogs
2 | ---------------------
3 |
4 | - |MajorFeature|: something big that you couldn't do before.
5 | - |Feature|: something that you couldn't do before.
6 | - |Efficiency|: an existing feature now may not require as much computation or
7 | memory.
8 | - |Enhancement|: a miscellaneous minor improvement.
9 | - |Fix|: something that previously didn't work as documentated -- or according
10 | to reasonable expectations -- should now work.
11 | - |API|: you will need to change your code to have the same effect in the
12 | future; or a feature will be removed in the future.
13 |
--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 |
3 | Examples
4 | ========
5 |
--------------------------------------------------------------------------------
/examples/applications/README.txt:
--------------------------------------------------------------------------------
1 | .. _realworld_examples:
2 |
3 | Examples based on real world datasets
4 | -------------------------------------
5 |
6 | Applications to real world problems with some medium sized datasets or
7 | interactive user interface.
8 |
--------------------------------------------------------------------------------
/examples/bicluster/README.txt:
--------------------------------------------------------------------------------
1 | .. _bicluster_examples:
2 |
3 | Biclustering
4 | ------------
5 |
6 | Examples concerning the :mod:`sklearn.cluster.bicluster` module.
7 |
--------------------------------------------------------------------------------
/examples/calibration/README.txt:
--------------------------------------------------------------------------------
1 | .. _calibration_examples:
2 |
3 | Calibration
4 | -----------------------
5 |
6 | Examples illustrating the calibration of predicted probabilities of classifiers.
7 |
--------------------------------------------------------------------------------
/examples/classification/README.txt:
--------------------------------------------------------------------------------
1 | .. _classification_examples:
2 |
3 | Classification
4 | -----------------------
5 |
6 | General examples about classification algorithms.
7 |
--------------------------------------------------------------------------------
/examples/cluster/README.txt:
--------------------------------------------------------------------------------
1 | .. _cluster_examples:
2 |
3 | Clustering
4 | ----------
5 |
6 | Examples concerning the :mod:`sklearn.cluster` module.
7 |
--------------------------------------------------------------------------------
/examples/compose/README.txt:
--------------------------------------------------------------------------------
1 | .. _compose_examples:
2 |
3 | Pipelines and composite estimators
4 | ----------------------------------
5 |
6 | Examples of how to compose transformers and pipelines from other estimators. See the :ref:`User Guide `.
7 |
--------------------------------------------------------------------------------
/examples/covariance/README.txt:
--------------------------------------------------------------------------------
1 | .. _covariance_examples:
2 |
3 | Covariance estimation
4 | ---------------------
5 |
6 | Examples concerning the :mod:`sklearn.covariance` module.
7 |
--------------------------------------------------------------------------------
/examples/cross_decomposition/README.txt:
--------------------------------------------------------------------------------
1 | .. _cross_decomposition_examples:
2 |
3 | Cross decomposition
4 | -------------------
5 |
6 | Examples concerning the :mod:`sklearn.cross_decomposition` module.
7 |
8 |
--------------------------------------------------------------------------------
/examples/datasets/README.txt:
--------------------------------------------------------------------------------
1 | .. _dataset_examples:
2 |
3 | Dataset examples
4 | -----------------------
5 |
6 | Examples concerning the :mod:`sklearn.datasets` module.
7 |
--------------------------------------------------------------------------------
/examples/datasets/plot_digits_last_image.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | =========================================================
4 | The Digit Dataset
5 | =========================================================
6 |
7 | This dataset is made up of 1797 8x8 images. Each image,
8 | like the one shown below, is of a hand-written digit.
9 | In order to utilize an 8x8 figure like this, we'd have to
10 | first transform it into a feature vector with length 64.
11 |
12 | See `here
13 | `_
14 | for more information about this dataset.
15 |
16 | """
17 |
18 | # Code source: Gaël Varoquaux
19 | # Modified for documentation by Jaques Grobler
20 | # License: BSD 3 clause
21 |
22 | from sklearn import datasets
23 |
24 | import matplotlib.pyplot as plt
25 |
26 | # Load the digits dataset
27 | digits = datasets.load_digits()
28 |
29 | # Display the last digit
30 | plt.figure(1, figsize=(3, 3))
31 | plt.imshow(digits.images[-1], cmap=plt.cm.gray_r, interpolation="nearest")
32 | plt.show()
33 |
--------------------------------------------------------------------------------
/examples/decomposition/README.txt:
--------------------------------------------------------------------------------
1 | .. _decomposition_examples:
2 |
3 | Decomposition
4 | -------------
5 |
6 | Examples concerning the :mod:`sklearn.decomposition` module.
7 |
8 |
--------------------------------------------------------------------------------
/examples/decomposition/plot_beta_divergence.py:
--------------------------------------------------------------------------------
1 | """
2 | ==============================
3 | Beta-divergence loss functions
4 | ==============================
5 |
6 | A plot that compares the various Beta-divergence loss functions supported by
7 | the Multiplicative-Update ('mu') solver in :class:`~sklearn.decomposition.NMF`.
8 |
9 | """
10 |
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | from sklearn.decomposition._nmf import _beta_divergence
14 |
15 | x = np.linspace(0.001, 4, 1000)
16 | y = np.zeros(x.shape)
17 |
18 | colors = "mbgyr"
19 | for j, beta in enumerate((0.0, 0.5, 1.0, 1.5, 2.0)):
20 | for i, xi in enumerate(x):
21 | y[i] = _beta_divergence(1, xi, 1, beta)
22 | name = "beta = %1.1f" % beta
23 | plt.plot(x, y, label=name, color=colors[j])
24 |
25 | plt.xlabel("x")
26 | plt.title("beta-divergence(1, x)")
27 | plt.legend(loc=0)
28 | plt.axis([0, 4, 0, 3])
29 | plt.show()
30 |
--------------------------------------------------------------------------------
/examples/ensemble/README.txt:
--------------------------------------------------------------------------------
1 | .. _ensemble_examples:
2 |
3 | Ensemble methods
4 | ----------------
5 |
6 | Examples concerning the :mod:`sklearn.ensemble` module.
7 |
--------------------------------------------------------------------------------
/examples/exercises/README.txt:
--------------------------------------------------------------------------------
1 | Tutorial exercises
2 | ------------------
3 |
4 | Exercises for the tutorials
5 |
--------------------------------------------------------------------------------
/examples/exercises/plot_digits_classification_exercise.py:
--------------------------------------------------------------------------------
1 | """
2 | ================================
3 | Digits Classification Exercise
4 | ================================
5 |
6 | A tutorial exercise regarding the use of classification techniques on
7 | the Digits dataset.
8 |
9 | This exercise is used in the :ref:`clf_tut` part of the
10 | :ref:`supervised_learning_tut` section of the
11 | :ref:`stat_learn_tut_index`.
12 |
13 | """
14 |
15 | from sklearn import datasets, neighbors, linear_model
16 |
17 | X_digits, y_digits = datasets.load_digits(return_X_y=True)
18 | X_digits = X_digits / X_digits.max()
19 |
20 | n_samples = len(X_digits)
21 |
22 | X_train = X_digits[: int(0.9 * n_samples)]
23 | y_train = y_digits[: int(0.9 * n_samples)]
24 | X_test = X_digits[int(0.9 * n_samples) :]
25 | y_test = y_digits[int(0.9 * n_samples) :]
26 |
27 | knn = neighbors.KNeighborsClassifier()
28 | logistic = linear_model.LogisticRegression(max_iter=1000)
29 |
30 | print("KNN score: %f" % knn.fit(X_train, y_train).score(X_test, y_test))
31 | print(
32 | "LogisticRegression score: %f"
33 | % logistic.fit(X_train, y_train).score(X_test, y_test)
34 | )
35 |
--------------------------------------------------------------------------------
/examples/feature_selection/README.txt:
--------------------------------------------------------------------------------
1 | .. _feature_selection_examples:
2 |
3 | Feature Selection
4 | -----------------------
5 |
6 | Examples concerning the :mod:`sklearn.feature_selection` module.
7 |
--------------------------------------------------------------------------------
/examples/feature_selection/plot_rfe_digits.py:
--------------------------------------------------------------------------------
1 | """
2 | =============================
3 | Recursive feature elimination
4 | =============================
5 |
6 | A recursive feature elimination example showing the relevance of pixels in
7 | a digit classification task.
8 |
9 | .. note::
10 |
11 | See also :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`
12 |
13 | """ # noqa: E501
14 |
15 | from sklearn.svm import SVC
16 | from sklearn.datasets import load_digits
17 | from sklearn.feature_selection import RFE
18 | import matplotlib.pyplot as plt
19 |
20 | # Load the digits dataset
21 | digits = load_digits()
22 | X = digits.images.reshape((len(digits.images), -1))
23 | y = digits.target
24 |
25 | # Create the RFE object and rank each pixel
26 | svc = SVC(kernel="linear", C=1)
27 | rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
28 | rfe.fit(X, y)
29 | ranking = rfe.ranking_.reshape(digits.images[0].shape)
30 |
31 | # Plot pixel ranking
32 | plt.matshow(ranking, cmap=plt.cm.Blues)
33 | plt.colorbar()
34 | plt.title("Ranking of pixels with RFE")
35 | plt.show()
36 |
--------------------------------------------------------------------------------
/examples/gaussian_process/README.txt:
--------------------------------------------------------------------------------
1 | .. _gaussian_process_examples:
2 |
3 | Gaussian Process for Machine Learning
4 | -------------------------------------
5 |
6 | Examples concerning the :mod:`sklearn.gaussian_process` module.
7 |
8 |
--------------------------------------------------------------------------------
/examples/impute/README.txt:
--------------------------------------------------------------------------------
1 | .. _impute_examples:
2 |
3 | Missing Value Imputation
4 | ------------------------
5 |
6 | Examples concerning the :mod:`sklearn.impute` module.
7 |
--------------------------------------------------------------------------------
/examples/inspection/README.txt:
--------------------------------------------------------------------------------
1 | .. _inspection_examples:
2 |
3 | Inspection
4 | ----------
5 |
6 | Examples related to the :mod:`sklearn.inspection` module.
7 |
8 |
--------------------------------------------------------------------------------
/examples/kernel_approximation/README.txt:
--------------------------------------------------------------------------------
1 | .. _kernel_approximation_examples:
2 |
3 | Kernel Approximation
4 | --------------------
5 |
6 | Examples concerning the :mod:`sklearn.kernel_approximation` module.
7 |
--------------------------------------------------------------------------------
/examples/linear_model/README.txt:
--------------------------------------------------------------------------------
1 | .. _linear_examples:
2 |
3 | Generalized Linear Models
4 | -------------------------
5 |
6 | Examples concerning the :mod:`sklearn.linear_model` module.
7 |
--------------------------------------------------------------------------------
/examples/linear_model/plot_lasso_lars.py:
--------------------------------------------------------------------------------
1 | """
2 | =====================
3 | Lasso path using LARS
4 | =====================
5 |
6 | Computes Lasso Path along the regularization parameter using the LARS
7 | algorithm on the diabetes dataset. Each color represents a different
8 | feature of the coefficient vector, and this is displayed as a function
9 | of the regularization parameter.
10 |
11 | """
12 |
13 | # Author: Fabian Pedregosa
14 | # Alexandre Gramfort
15 | # License: BSD 3 clause
16 |
17 | import numpy as np
18 | import matplotlib.pyplot as plt
19 |
20 | from sklearn import linear_model
21 | from sklearn import datasets
22 |
23 | X, y = datasets.load_diabetes(return_X_y=True)
24 |
25 | print("Computing regularization path using the LARS ...")
26 | _, _, coefs = linear_model.lars_path(X, y, method="lasso", verbose=True)
27 |
28 | xx = np.sum(np.abs(coefs.T), axis=1)
29 | xx /= xx[-1]
30 |
31 | plt.plot(xx, coefs.T)
32 | ymin, ymax = plt.ylim()
33 | plt.vlines(xx, ymin, ymax, linestyle="dashed")
34 | plt.xlabel("|coef| / max|coef|")
35 | plt.ylabel("Coefficients")
36 | plt.title("LASSO Path")
37 | plt.axis("tight")
38 | plt.show()
39 |
--------------------------------------------------------------------------------
/examples/manifold/README.txt:
--------------------------------------------------------------------------------
1 | .. _manifold_examples:
2 |
3 | Manifold learning
4 | -----------------------
5 |
6 | Examples concerning the :mod:`sklearn.manifold` module.
7 |
8 |
--------------------------------------------------------------------------------
/examples/miscellaneous/README.txt:
--------------------------------------------------------------------------------
1 | .. _miscellaneous_examples:
2 |
3 | Miscellaneous
4 | -------------
5 |
6 | Miscellaneous and introductory examples for scikit-learn.
7 |
8 |
--------------------------------------------------------------------------------
/examples/miscellaneous/plot_changed_only_pprint_parameter.py:
--------------------------------------------------------------------------------
1 | """
2 | =================================
3 | Compact estimator representations
4 | =================================
5 |
6 | This example illustrates the use of the print_changed_only global parameter.
7 |
8 | Setting print_changed_only to True will alternate the representation of
9 | estimators to only show the parameters that have been set to non-default
10 | values. This can be used to have more compact representations.
11 |
12 | """
13 |
14 | from sklearn.linear_model import LogisticRegression
15 | from sklearn import set_config
16 |
17 |
18 | lr = LogisticRegression(penalty="l1")
19 | print("Default representation:")
20 | print(lr)
21 | # LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
22 | # intercept_scaling=1, l1_ratio=None, max_iter=100,
23 | # multi_class='auto', n_jobs=None, penalty='l1',
24 | # random_state=None, solver='warn', tol=0.0001, verbose=0,
25 | # warm_start=False)
26 |
27 | set_config(print_changed_only=True)
28 | print("\nWith changed_only option:")
29 | print(lr)
30 | # LogisticRegression(penalty='l1')
31 |
--------------------------------------------------------------------------------
/examples/mixture/README.txt:
--------------------------------------------------------------------------------
1 | .. _mixture_examples:
2 |
3 | Gaussian Mixture Models
4 | -----------------------
5 |
6 | Examples concerning the :mod:`sklearn.mixture` module.
7 |
--------------------------------------------------------------------------------
/examples/model_selection/README.txt:
--------------------------------------------------------------------------------
1 | .. _model_selection_examples:
2 |
3 | Model Selection
4 | -----------------------
5 |
6 | Examples related to the :mod:`sklearn.model_selection` module.
7 |
--------------------------------------------------------------------------------
/examples/model_selection/plot_cv_predict.py:
--------------------------------------------------------------------------------
1 | """
2 | ====================================
3 | Plotting Cross-Validated Predictions
4 | ====================================
5 |
6 | This example shows how to use
7 | :func:`~sklearn.model_selection.cross_val_predict` to visualize prediction
8 | errors.
9 |
10 | """
11 |
12 | from sklearn import datasets
13 | from sklearn.model_selection import cross_val_predict
14 | from sklearn import linear_model
15 | import matplotlib.pyplot as plt
16 |
17 | lr = linear_model.LinearRegression()
18 | X, y = datasets.load_diabetes(return_X_y=True)
19 |
20 | # cross_val_predict returns an array of the same size as `y` where each entry
21 | # is a prediction obtained by cross validation:
22 | predicted = cross_val_predict(lr, X, y, cv=10)
23 |
24 | fig, ax = plt.subplots()
25 | ax.scatter(y, predicted, edgecolors=(0, 0, 0))
26 | ax.plot([y.min(), y.max()], [y.min(), y.max()], "k--", lw=4)
27 | ax.set_xlabel("Measured")
28 | ax.set_ylabel("Predicted")
29 | plt.show()
30 |
--------------------------------------------------------------------------------
/examples/multioutput/README.txt:
--------------------------------------------------------------------------------
1 | .. _multioutput_examples:
2 |
3 | Multioutput methods
4 | -------------------
5 |
6 | Examples concerning the :mod:`sklearn.multioutput` module.
7 |
--------------------------------------------------------------------------------
/examples/neighbors/README.txt:
--------------------------------------------------------------------------------
1 | .. _neighbors_examples:
2 |
3 | Nearest Neighbors
4 | -----------------------
5 |
6 | Examples concerning the :mod:`sklearn.neighbors` module.
7 |
--------------------------------------------------------------------------------
/examples/neural_networks/README.txt:
--------------------------------------------------------------------------------
1 | .. _neural_network_examples:
2 |
3 | Neural Networks
4 | -----------------------
5 |
6 | Examples concerning the :mod:`sklearn.neural_network` module.
7 |
--------------------------------------------------------------------------------
/examples/preprocessing/README.txt:
--------------------------------------------------------------------------------
1 | .. _preprocessing_examples:
2 |
3 | Preprocessing
4 | -------------
5 |
6 | Examples concerning the :mod:`sklearn.preprocessing` module.
7 |
--------------------------------------------------------------------------------
/examples/release_highlights/README.txt:
--------------------------------------------------------------------------------
1 | .. _release_highlights_examples:
2 |
3 | Release Highlights
4 | ------------------
5 |
6 | These examples illustrate the main features of the releases of scikit-learn.
7 |
--------------------------------------------------------------------------------
/examples/semi_supervised/README.txt:
--------------------------------------------------------------------------------
1 | .. _semi_supervised_examples:
2 |
3 | Semi Supervised Classification
4 | ------------------------------
5 |
6 | Examples concerning the :mod:`sklearn.semi_supervised` module.
7 |
--------------------------------------------------------------------------------
/examples/svm/README.txt:
--------------------------------------------------------------------------------
1 | .. _svm_examples:
2 |
3 | Support Vector Machines
4 | -----------------------
5 |
6 | Examples concerning the :mod:`sklearn.svm` module.
7 |
--------------------------------------------------------------------------------
/examples/svm/plot_svm_nonlinear.py:
--------------------------------------------------------------------------------
1 | """
2 | ==============
3 | Non-linear SVM
4 | ==============
5 |
6 | Perform binary classification using non-linear SVC
7 | with RBF kernel. The target to predict is a XOR of the
8 | inputs.
9 |
10 | The color map illustrates the decision function learned by the SVC.
11 |
12 | """
13 |
14 | import numpy as np
15 | import matplotlib.pyplot as plt
16 | from sklearn import svm
17 |
18 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500))
19 | np.random.seed(0)
20 | X = np.random.randn(300, 2)
21 | Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)
22 |
23 | # fit the model
24 | clf = svm.NuSVC(gamma="auto")
25 | clf.fit(X, Y)
26 |
27 | # plot the decision function for each datapoint on the grid
28 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
29 | Z = Z.reshape(xx.shape)
30 |
31 | plt.imshow(
32 | Z,
33 | interpolation="nearest",
34 | extent=(xx.min(), xx.max(), yy.min(), yy.max()),
35 | aspect="auto",
36 | origin="lower",
37 | cmap=plt.cm.PuOr_r,
38 | )
39 | contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2, linestyles="dashed")
40 | plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired, edgecolors="k")
41 | plt.xticks(())
42 | plt.yticks(())
43 | plt.axis([-3, 3, -3, 3])
44 | plt.show()
45 |
--------------------------------------------------------------------------------
/examples/text/README.txt:
--------------------------------------------------------------------------------
1 | .. _text_examples:
2 |
3 | Working with text documents
4 | ----------------------------
5 |
6 | Examples concerning the :mod:`sklearn.feature_extraction.text` module.
7 |
--------------------------------------------------------------------------------
/examples/tree/README.txt:
--------------------------------------------------------------------------------
1 | .. _tree_examples:
2 |
3 | Decision Trees
4 | --------------
5 |
6 | Examples concerning the :mod:`sklearn.tree` module.
7 |
--------------------------------------------------------------------------------
/lgtm.yml:
--------------------------------------------------------------------------------
1 | extraction:
2 | cpp:
3 | before_index:
4 | - pip3 install numpy==1.16.3
5 | - pip3 install --no-deps scipy Cython
6 | index:
7 | build_command:
8 | - python3 setup.py build_ext -i
9 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | # Minimum requirements for the build system to execute.
3 | requires = [
4 | "setuptools<60.0",
5 | "wheel",
6 | "Cython>=0.28.5",
7 |
8 | # use oldest-supported-numpy which provides the oldest numpy version with
9 | # wheels on PyPI
10 | #
11 | # see: https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg
12 | "oldest-supported-numpy",
13 |
14 | "scipy>=1.3.2",
15 | ]
16 |
17 | [tool.black]
18 | line-length = 88
19 | target_version = ['py38', 'py39', 'py310']
20 | preview = true
21 | exclude = '''
22 | /(
23 | \.eggs # exclude a few common directories in the
24 | | \.git # root of the project
25 | | \.mypy_cache
26 | | \.vscode
27 | | build
28 | | dist
29 | | doc/tutorial
30 | | doc/_build
31 | | doc/auto_examples
32 | | sklearn/externals
33 | | asv_benchmarks/env
34 | )/
35 | '''
36 |
--------------------------------------------------------------------------------
/sklearn/__check_build/_check_build.pyx:
--------------------------------------------------------------------------------
1 | def check_build():
2 | return
3 |
--------------------------------------------------------------------------------
/sklearn/__check_build/setup.py:
--------------------------------------------------------------------------------
1 | # Author: Virgile Fritsch
2 | # License: BSD 3 clause
3 |
4 | import numpy
5 |
6 |
7 | def configuration(parent_package="", top_path=None):
8 | from numpy.distutils.misc_util import Configuration
9 |
10 | config = Configuration("__check_build", parent_package, top_path)
11 | config.add_extension(
12 | "_check_build", sources=["_check_build.pyx"], include_dirs=[numpy.get_include()]
13 | )
14 |
15 | return config
16 |
17 |
18 | if __name__ == "__main__":
19 | from numpy.distutils.core import setup
20 |
21 | setup(**configuration(top_path="").todict())
22 |
--------------------------------------------------------------------------------
/sklearn/_distributor_init.py:
--------------------------------------------------------------------------------
1 | """ Distributor init file
2 |
3 | Distributors: you can add custom code here to support particular distributions
4 | of scikit-learn.
5 |
6 | For example, this is a good place to put any checks for hardware requirements.
7 |
8 | The scikit-learn standard source distribution will not put code in this file,
9 | so you can safely replace this file with your own version.
10 | """
11 |
--------------------------------------------------------------------------------
/sklearn/_loss/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn._loss` module includes loss function classes suitable for
3 | fitting classification and regression tasks.
4 | """
5 |
6 | from .loss import (
7 | HalfSquaredError,
8 | AbsoluteError,
9 | PinballLoss,
10 | HalfPoissonLoss,
11 | HalfGammaLoss,
12 | HalfTweedieLoss,
13 | HalfBinomialLoss,
14 | HalfMultinomialLoss,
15 | )
16 |
17 |
18 | __all__ = [
19 | "HalfSquaredError",
20 | "AbsoluteError",
21 | "PinballLoss",
22 | "HalfPoissonLoss",
23 | "HalfGammaLoss",
24 | "HalfTweedieLoss",
25 | "HalfBinomialLoss",
26 | "HalfMultinomialLoss",
27 | ]
28 |
--------------------------------------------------------------------------------
/sklearn/_loss/setup.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | from numpy.distutils.misc_util import Configuration
3 | from sklearn._build_utils import gen_from_templates
4 |
5 |
6 | def configuration(parent_package="", top_path=None):
7 | config = Configuration("_loss", parent_package, top_path)
8 |
9 | # generate _loss.pyx from template
10 | templates = ["sklearn/_loss/_loss.pyx.tp"]
11 | gen_from_templates(templates)
12 |
13 | config.add_extension(
14 | "_loss",
15 | sources=["_loss.pyx"],
16 | include_dirs=[numpy.get_include()],
17 | # define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
18 | )
19 | return config
20 |
21 |
22 | if __name__ == "__main__":
23 | from numpy.distutils.core import setup
24 |
25 | setup(**configuration().todict())
26 |
--------------------------------------------------------------------------------
/sklearn/_loss/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/_loss/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/cluster/_k_means_common.pxd:
--------------------------------------------------------------------------------
1 | from cython cimport floating
2 | cimport numpy as np
3 |
4 |
5 | cdef floating _euclidean_dense_dense(floating*, floating*, int, bint) nogil
6 |
7 | cdef floating _euclidean_sparse_dense(floating[::1], int[::1], floating[::1],
8 | floating, bint) nogil
9 |
10 | cpdef void _relocate_empty_clusters_dense(
11 | floating[:, ::1], floating[::1], floating[:, ::1],
12 | floating[:, ::1], floating[::1], int[::1])
13 |
14 | cpdef void _relocate_empty_clusters_sparse(
15 | floating[::1], int[::1], int[::1], floating[::1], floating[:, ::1],
16 | floating[:, ::1], floating[::1], int[::1])
17 |
18 | cdef void _average_centers(floating[:, ::1], floating[::1])
19 |
20 | cdef void _center_shift(floating[:, ::1], floating[:, ::1], floating[::1])
21 |
--------------------------------------------------------------------------------
/sklearn/cluster/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/cluster/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/cluster/tests/common.py:
--------------------------------------------------------------------------------
1 | """
2 | Common utilities for testing clustering.
3 |
4 | """
5 |
6 | import numpy as np
7 |
8 |
9 | ###############################################################################
10 | # Generate sample data
11 |
12 |
13 | def generate_clustered_data(
14 | seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=0.4
15 | ):
16 | prng = np.random.RandomState(seed)
17 |
18 | # the data is voluntary shifted away from zero to check clustering
19 | # algorithm robustness with regards to non centered data
20 | means = (
21 | np.array(
22 | [
23 | [1, 1, 1, 0],
24 | [-1, -1, 0, 1],
25 | [1, -1, 1, 1],
26 | [-1, 1, 1, 0],
27 | ]
28 | )
29 | + 10
30 | )
31 |
32 | X = np.empty((0, n_features))
33 | for i in range(n_clusters):
34 | X = np.r_[
35 | X,
36 | means[i][:n_features] + std * prng.randn(n_samples_per_cluster, n_features),
37 | ]
38 | return X
39 |
--------------------------------------------------------------------------------
/sklearn/compose/__init__.py:
--------------------------------------------------------------------------------
1 | """Meta-estimators for building composite models with transformers
2 |
3 | In addition to its current contents, this module will eventually be home to
4 | refurbished versions of Pipeline and FeatureUnion.
5 |
6 | """
7 |
8 | from ._column_transformer import (
9 | ColumnTransformer,
10 | make_column_transformer,
11 | make_column_selector,
12 | )
13 | from ._target import TransformedTargetRegressor
14 |
15 |
16 | __all__ = [
17 | "ColumnTransformer",
18 | "make_column_transformer",
19 | "TransformedTargetRegressor",
20 | "make_column_selector",
21 | ]
22 |
--------------------------------------------------------------------------------
/sklearn/compose/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/compose/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/covariance/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/covariance/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/cross_decomposition/__init__.py:
--------------------------------------------------------------------------------
1 | from ._pls import PLSCanonical, PLSRegression, PLSSVD, CCA
2 |
3 | __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"]
4 |
--------------------------------------------------------------------------------
/sklearn/cross_decomposition/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/cross_decomposition/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/data/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/data/diabetes_data_raw.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/data/diabetes_data_raw.csv.gz
--------------------------------------------------------------------------------
/sklearn/datasets/data/diabetes_target.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/data/diabetes_target.csv.gz
--------------------------------------------------------------------------------
/sklearn/datasets/data/digits.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/data/digits.csv.gz
--------------------------------------------------------------------------------
/sklearn/datasets/data/linnerud_exercise.csv:
--------------------------------------------------------------------------------
1 | Chins Situps Jumps
2 | 5 162 60
3 | 2 110 60
4 | 12 101 101
5 | 12 105 37
6 | 13 155 58
7 | 4 101 42
8 | 8 101 38
9 | 6 125 40
10 | 15 200 40
11 | 17 251 250
12 | 17 120 38
13 | 13 210 115
14 | 14 215 105
15 | 1 50 50
16 | 6 70 31
17 | 12 210 120
18 | 4 60 25
19 | 11 230 80
20 | 15 225 73
21 | 2 110 43
22 |
--------------------------------------------------------------------------------
/sklearn/datasets/data/linnerud_physiological.csv:
--------------------------------------------------------------------------------
1 | Weight Waist Pulse
2 | 191 36 50
3 | 189 37 52
4 | 193 38 58
5 | 162 35 62
6 | 189 35 46
7 | 182 36 56
8 | 211 38 56
9 | 167 34 60
10 | 176 31 74
11 | 154 33 56
12 | 169 34 50
13 | 166 33 52
14 | 154 34 64
15 | 247 46 50
16 | 193 36 46
17 | 202 37 62
18 | 176 37 54
19 | 157 32 52
20 | 156 33 54
21 | 138 33 68
22 |
--------------------------------------------------------------------------------
/sklearn/datasets/descr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/descr/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/descr/linnerud.rst:
--------------------------------------------------------------------------------
1 | .. _linnerrud_dataset:
2 |
3 | Linnerrud dataset
4 | -----------------
5 |
6 | **Data Set Characteristics:**
7 |
8 | :Number of Instances: 20
9 | :Number of Attributes: 3
10 | :Missing Attribute Values: None
11 |
12 | The Linnerud dataset is a multi-output regression dataset. It consists of three
13 | exercise (data) and three physiological (target) variables collected from
14 | twenty middle-aged men in a fitness club:
15 |
16 | - *physiological* - CSV containing 20 observations on 3 physiological variables:
17 | Weight, Waist and Pulse.
18 | - *exercise* - CSV containing 20 observations on 3 exercise variables:
19 | Chins, Situps and Jumps.
20 |
21 | .. topic:: References
22 |
23 | * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris:
24 | Editions Technic.
25 |
--------------------------------------------------------------------------------
/sklearn/datasets/images/README.txt:
--------------------------------------------------------------------------------
1 | Image: china.jpg
2 | Released under a creative commons license. [1]
3 | Attribution: Some rights reserved by danielbuechele [2]
4 | Retrieved 21st August, 2011 from [3] by Robert Layton
5 |
6 | [1] https://creativecommons.org/licenses/by/2.0/
7 | [2] https://www.flickr.com/photos/danielbuechele/
8 | [3] https://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/
9 |
10 |
11 | Image: flower.jpg
12 | Released under a creative commons license. [1]
13 | Attribution: Some rights reserved by danielbuechele [2]
14 | Retrieved 21st August, 2011 from [3] by Robert Layton
15 |
16 | [1] https://creativecommons.org/licenses/by/2.0/
17 | [2] https://www.flickr.com/photos/vultilion/
18 | [3] https://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/sklearn/datasets/images/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/images/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/images/china.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/images/china.jpg
--------------------------------------------------------------------------------
/sklearn/datasets/images/flower.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/images/flower.jpg
--------------------------------------------------------------------------------
/sklearn/datasets/setup.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import os
3 | import platform
4 |
5 |
6 | def configuration(parent_package="", top_path=None):
7 | from numpy.distutils.misc_util import Configuration
8 |
9 | config = Configuration("datasets", parent_package, top_path)
10 | config.add_data_dir("data")
11 | config.add_data_dir("descr")
12 | config.add_data_dir("images")
13 | config.add_data_dir(os.path.join("tests", "data"))
14 | if platform.python_implementation() != "PyPy":
15 | config.add_extension(
16 | "_svmlight_format_fast",
17 | sources=["_svmlight_format_fast.pyx"],
18 | include_dirs=[numpy.get_include()],
19 | )
20 | config.add_subpackage("tests")
21 | return config
22 |
23 |
24 | if __name__ == "__main__":
25 | from numpy.distutils.core import setup
26 |
27 | setup(**configuration(top_path="").todict())
28 |
--------------------------------------------------------------------------------
/sklearn/datasets/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/conftest.py:
--------------------------------------------------------------------------------
1 | """ Network tests are only run, if data is already locally available,
2 | or if download is specifically requested by environment variable."""
3 | import builtins
4 | import pytest
5 |
6 |
7 | @pytest.fixture
8 | def hide_available_pandas(monkeypatch):
9 | """Pretend pandas was not installed."""
10 | import_orig = builtins.__import__
11 |
12 | def mocked_import(name, *args, **kwargs):
13 | if name == "pandas":
14 | raise ImportError()
15 | return import_orig(name, *args, **kwargs)
16 |
17 | monkeypatch.setattr(builtins, "__import__", mocked_import)
18 |
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/api-v1-jdf-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/api-v1-jdf-1.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/api-v1-jdq-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/api-v1-jdq-1.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1/data-v1-dl-1.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/data-v1-dl-1.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jd-1119.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jd-1119.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jd-2.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jd-2.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jdf-2.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jdf-2.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-dv-1.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-s-act-.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_2/data-v1-dl-1666876.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/data-v1-dl-1666876.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-292.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-292.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-40981.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-40981.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-292.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-292.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-40981.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-40981.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-s-act-.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_292/data-v1-dl-49822.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/data-v1-dl-49822.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/api-v1-jd-3.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/api-v1-jd-3.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/api-v1-jdf-3.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/api-v1-jdf-3.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/api-v1-jdq-3.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/api-v1-jdq-3.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_3/data-v1-dl-3.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/data-v1-dl-3.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jd-40589.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jd-40589.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdf-40589.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdf-40589.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-dv-3.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-dv-3.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-s-act-.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdq-40589.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdq-40589.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40589/data-v1-dl-4644182.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/data-v1-dl-4644182.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jd-40675.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jd-40675.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdf-40675.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdf-40675.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-s-act-.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdq-40675.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdq-40675.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40675/data-v1-dl-4965250.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/data-v1-dl-4965250.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/api-v1-jd-40945.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/api-v1-jd-40945.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdf-40945.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdf-40945.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdq-40945.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdq-40945.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40945/data-v1-dl-16826755.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/data-v1-dl-16826755.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jd-40966.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jd-40966.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdf-40966.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdf-40966.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdq-40966.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdq-40966.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_40966/data-v1-dl-17928620.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/data-v1-dl-17928620.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/api-v1-jd-42585.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/api-v1-jd-42585.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdf-42585.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdf-42585.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdq-42585.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdq-42585.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_42585/data-v1-dl-21854866.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/data-v1-dl-21854866.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jd-561.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jd-561.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jdf-561.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jdf-561.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-dv-1.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-s-act-.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/api-v1-jdq-561.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jdq-561.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_561/data-v1-dl-52739.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/data-v1-dl-52739.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jd-61.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jd-61.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jdf-61.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jdf-61.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-dv-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-dv-1.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-s-act-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-s-act-.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/api-v1-jdq-61.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jdq-61.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_61/data-v1-dl-61.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/data-v1-dl-61.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/__init__.py
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/api-v1-jd-62.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/api-v1-jd-62.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/api-v1-jdf-62.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/api-v1-jdf-62.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/api-v1-jdq-62.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/api-v1-jdq-62.json.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/id_62/data-v1-dl-52352.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/data-v1-dl-52352.arff.gz
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_classification.txt:
--------------------------------------------------------------------------------
1 | # comment
2 | # note: the next line contains a tab
3 | 1.0 3:2.5 11:-5.2 16:1.5 # and an inline comment
4 | 2.0 6:1.0 13:-3
5 | # another comment
6 | 3.0 21:27
7 | 4.0 2:1.234567890123456e10 # double precision value
8 | 1.0 # empty line, all zeros
9 | 2.0 3:0 # explicit zeros
10 |
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_invalid.txt:
--------------------------------------------------------------------------------
1 | python 2:2.5 10:-5.2 15:1.5
2 | 2.0 5:1.0 12:-3
3 | 3.0 20:27
4 |
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_invalid_order.txt:
--------------------------------------------------------------------------------
1 | -1 5:2.5 2:-5.2 15:1.5
2 |
--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_multilabel.txt:
--------------------------------------------------------------------------------
1 | # multilabel dataset in SVMlight format
2 | 1,0 2:2.5 10:-5.2 15:1.5
3 | 2 5:1.0 12:-3
4 | 2:3.5 11:26
5 | 1,2 20:27
6 |
--------------------------------------------------------------------------------
/sklearn/datasets/tests/test_olivetti_faces.py:
--------------------------------------------------------------------------------
1 | """Test Olivetti faces fetcher, if the data is available,
2 | or if specifically requested via environment variable
3 | (e.g. for travis cron job)."""
4 |
5 | import numpy as np
6 |
7 | from sklearn.utils import Bunch
8 | from sklearn.datasets.tests.test_common import check_return_X_y
9 |
10 | from sklearn.utils._testing import assert_array_equal
11 |
12 |
13 | def test_olivetti_faces(fetch_olivetti_faces_fxt):
14 | data = fetch_olivetti_faces_fxt(shuffle=True, random_state=0)
15 |
16 | assert isinstance(data, Bunch)
17 | for expected_keys in ("data", "images", "target", "DESCR"):
18 | assert expected_keys in data.keys()
19 |
20 | assert data.data.shape == (400, 4096)
21 | assert data.images.shape == (400, 64, 64)
22 | assert data.target.shape == (400,)
23 | assert_array_equal(np.unique(np.sort(data.target)), np.arange(40))
24 | assert data.DESCR.startswith(".. _olivetti_faces_dataset:")
25 |
26 | # test the return_X_y option
27 | check_return_X_y(data, fetch_olivetti_faces_fxt)
28 |
--------------------------------------------------------------------------------
/sklearn/decomposition/_cdnmf_fast.pyx:
--------------------------------------------------------------------------------
1 | # Author: Mathieu Blondel, Tom Dupre la Tour
2 | # License: BSD 3 clause
3 |
4 | from cython cimport floating
5 | from libc.math cimport fabs
6 |
7 |
8 | def _update_cdnmf_fast(floating[:, ::1] W, floating[:, :] HHt,
9 | floating[:, :] XHt, Py_ssize_t[::1] permutation):
10 | cdef:
11 | floating violation = 0
12 | Py_ssize_t n_components = W.shape[1]
13 | Py_ssize_t n_samples = W.shape[0] # n_features for H update
14 | floating grad, pg, hess
15 | Py_ssize_t i, r, s, t
16 |
17 | with nogil:
18 | for s in range(n_components):
19 | t = permutation[s]
20 |
21 | for i in range(n_samples):
22 | # gradient = GW[t, i] where GW = np.dot(W, HHt) - XHt
23 | grad = -XHt[i, t]
24 |
25 | for r in range(n_components):
26 | grad += HHt[t, r] * W[i, r]
27 |
28 | # projected gradient
29 | pg = min(0., grad) if W[i, t] == 0 else grad
30 | violation += fabs(pg)
31 |
32 | # Hessian
33 | hess = HHt[t, t]
34 |
35 | if hess != 0:
36 | W[i, t] = max(W[i, t] - grad / hess, 0.)
37 |
38 | return violation
39 |
--------------------------------------------------------------------------------
/sklearn/decomposition/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy
3 | from numpy.distutils.misc_util import Configuration
4 |
5 |
6 | def configuration(parent_package="", top_path=None):
7 | config = Configuration("decomposition", parent_package, top_path)
8 |
9 | libraries = []
10 | if os.name == "posix":
11 | libraries.append("m")
12 |
13 | config.add_extension(
14 | "_online_lda_fast",
15 | sources=["_online_lda_fast.pyx"],
16 | include_dirs=[numpy.get_include()],
17 | libraries=libraries,
18 | )
19 |
20 | config.add_extension(
21 | "_cdnmf_fast",
22 | sources=["_cdnmf_fast.pyx"],
23 | include_dirs=[numpy.get_include()],
24 | libraries=libraries,
25 | )
26 |
27 | config.add_subpackage("tests")
28 |
29 | return config
30 |
31 |
32 | if __name__ == "__main__":
33 | from numpy.distutils.core import setup
34 |
35 | setup(**configuration().todict())
36 |
--------------------------------------------------------------------------------
/sklearn/decomposition/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/decomposition/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/ensemble/_hist_gradient_boosting/__init__.py:
--------------------------------------------------------------------------------
1 | """This module implements histogram-based gradient boosting estimators.
2 |
3 | The implementation is a port from pygbm which is itself strongly inspired
4 | from LightGBM.
5 | """
6 |
--------------------------------------------------------------------------------
/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd:
--------------------------------------------------------------------------------
1 | from .common cimport X_BINNED_DTYPE_C
2 | from .common cimport BITSET_DTYPE_C
3 | from .common cimport BITSET_INNER_DTYPE_C
4 | from .common cimport X_DTYPE_C
5 |
6 | cdef void init_bitset(BITSET_DTYPE_C bitset) nogil
7 |
8 | cdef void set_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) nogil
9 |
10 | cdef unsigned char in_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) nogil
11 |
12 | cpdef unsigned char in_bitset_memoryview(const BITSET_INNER_DTYPE_C[:] bitset,
13 | X_BINNED_DTYPE_C val) nogil
14 |
15 | cdef unsigned char in_bitset_2d_memoryview(
16 | const BITSET_INNER_DTYPE_C [:, :] bitset,
17 | X_BINNED_DTYPE_C val,
18 | unsigned int row) nogil
19 |
--------------------------------------------------------------------------------
/sklearn/ensemble/_hist_gradient_boosting/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/ensemble/_hist_gradient_boosting/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/ensemble/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/ensemble/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn.experimental` module provides importable modules that enable
3 | the use of experimental features or estimators.
4 |
5 | The features and estimators that are experimental aren't subject to
6 | deprecation cycles. Use them at your own risks!
7 | """
8 |
--------------------------------------------------------------------------------
/sklearn/experimental/enable_hist_gradient_boosting.py:
--------------------------------------------------------------------------------
1 | """This is now a no-op and can be safely removed from your code.
2 |
3 | It used to enable the use of
4 | :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
5 | :class:`~sklearn.ensemble.HistGradientBoostingRegressor` when they were still
6 | :term:`experimental`, but these estimators are now stable and can be imported
7 | normally from `sklearn.ensemble`.
8 | """
9 | # Don't remove this file, we don't want to break users code just because the
10 | # feature isn't experimental anymore.
11 |
12 |
13 | import warnings
14 |
15 |
16 | warnings.warn(
17 | "Since version 1.0, "
18 | "it is not needed to import enable_hist_gradient_boosting anymore. "
19 | "HistGradientBoostingClassifier and HistGradientBoostingRegressor are now "
20 | "stable and can be normally imported from sklearn.ensemble."
21 | )
22 |
--------------------------------------------------------------------------------
/sklearn/experimental/enable_iterative_imputer.py:
--------------------------------------------------------------------------------
1 | """Enables IterativeImputer
2 |
3 | The API and results of this estimator might change without any deprecation
4 | cycle.
5 |
6 | Importing this file dynamically sets :class:`~sklearn.impute.IterativeImputer`
7 | as an attribute of the impute module::
8 |
9 | >>> # explicitly require this experimental feature
10 | >>> from sklearn.experimental import enable_iterative_imputer # noqa
11 | >>> # now you can import normally from impute
12 | >>> from sklearn.impute import IterativeImputer
13 | """
14 |
15 | from ..impute._iterative import IterativeImputer
16 | from .. import impute
17 |
18 | # use settattr to avoid mypy errors when monkeypatching
19 | setattr(impute, "IterativeImputer", IterativeImputer)
20 | impute.__all__ += ["IterativeImputer"]
21 |
--------------------------------------------------------------------------------
/sklearn/experimental/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/experimental/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/experimental/tests/test_enable_hist_gradient_boosting.py:
--------------------------------------------------------------------------------
1 | """Tests for making sure experimental imports work as expected."""
2 |
3 | import textwrap
4 |
5 | from sklearn.utils._testing import assert_run_python_script
6 |
7 |
8 | def test_import_raises_warning():
9 | code = """
10 | import pytest
11 | with pytest.warns(UserWarning, match="it is not needed to import"):
12 | from sklearn.experimental import enable_hist_gradient_boosting # noqa
13 | """
14 | assert_run_python_script(textwrap.dedent(code))
15 |
--------------------------------------------------------------------------------
/sklearn/externals/README:
--------------------------------------------------------------------------------
1 | This directory contains bundled external dependencies that are updated
2 | every once in a while.
3 |
4 | Note for distribution packagers: if you want to remove the duplicated
5 | code and depend on a packaged version, we suggest that you simply do a
6 | symbolic link in this directory.
7 |
8 |
--------------------------------------------------------------------------------
/sklearn/externals/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | External, bundled dependencies.
4 |
5 | """
6 |
--------------------------------------------------------------------------------
/sklearn/externals/_packaging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/externals/_packaging/__init__.py
--------------------------------------------------------------------------------
/sklearn/externals/conftest.py:
--------------------------------------------------------------------------------
1 | # Do not collect any tests in externals. This is more robust than using
2 | # --ignore because --ignore needs a path and it is not convenient to pass in
3 | # the externals path (very long install-dependent path in site-packages) when
4 | # using --pyargs
5 | def pytest_ignore_collect(path, config):
6 | return True
7 |
8 |
--------------------------------------------------------------------------------
/sklearn/feature_extraction/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn.feature_extraction` module deals with feature extraction
3 | from raw data. It currently includes methods to extract features from text and
4 | images.
5 | """
6 |
7 | from ._dict_vectorizer import DictVectorizer
8 | from ._hash import FeatureHasher
9 | from .image import img_to_graph, grid_to_graph
10 | from . import text
11 |
12 | __all__ = [
13 | "DictVectorizer",
14 | "image",
15 | "img_to_graph",
16 | "grid_to_graph",
17 | "text",
18 | "FeatureHasher",
19 | ]
20 |
--------------------------------------------------------------------------------
/sklearn/feature_extraction/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import platform
3 |
4 |
5 | def configuration(parent_package="", top_path=None):
6 | import numpy
7 | from numpy.distutils.misc_util import Configuration
8 |
9 | config = Configuration("feature_extraction", parent_package, top_path)
10 | libraries = []
11 | if os.name == "posix":
12 | libraries.append("m")
13 |
14 | if platform.python_implementation() != "PyPy":
15 | config.add_extension(
16 | "_hashing_fast",
17 | sources=["_hashing_fast.pyx"],
18 | include_dirs=[numpy.get_include()],
19 | libraries=libraries,
20 | )
21 | config.add_subpackage("tests")
22 |
23 | return config
24 |
--------------------------------------------------------------------------------
/sklearn/feature_extraction/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/feature_extraction/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/feature_selection/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/feature_selection/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/gaussian_process/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Author: Jan Hendrik Metzen
4 | # Vincent Dubourg
5 | # (mostly translation, see implementation details)
6 | # License: BSD 3 clause
7 |
8 | """
9 | The :mod:`sklearn.gaussian_process` module implements Gaussian Process
10 | based regression and classification.
11 | """
12 |
13 | from ._gpr import GaussianProcessRegressor
14 | from ._gpc import GaussianProcessClassifier
15 | from . import kernels
16 |
17 |
18 | __all__ = ["GaussianProcessRegressor", "GaussianProcessClassifier", "kernels"]
19 |
--------------------------------------------------------------------------------
/sklearn/gaussian_process/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/gaussian_process/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/impute/__init__.py:
--------------------------------------------------------------------------------
1 | """Transformers for missing value imputation"""
2 | import typing
3 |
4 | from ._base import MissingIndicator, SimpleImputer
5 | from ._knn import KNNImputer
6 |
7 | if typing.TYPE_CHECKING:
8 | # Avoid errors in type checkers (e.g. mypy) for experimental estimators.
9 | # TODO: remove this check once the estimator is no longer experimental.
10 | from ._iterative import IterativeImputer # noqa
11 |
12 | __all__ = ["MissingIndicator", "SimpleImputer", "KNNImputer"]
13 |
--------------------------------------------------------------------------------
/sklearn/impute/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/impute/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/inspection/__init__.py:
--------------------------------------------------------------------------------
1 | """The :mod:`sklearn.inspection` module includes tools for model inspection."""
2 |
3 |
4 | from ._permutation_importance import permutation_importance
5 |
6 | from ._partial_dependence import partial_dependence
7 | from ._plot.partial_dependence import plot_partial_dependence
8 | from ._plot.partial_dependence import PartialDependenceDisplay
9 |
10 |
11 | __all__ = [
12 | "partial_dependence",
13 | "plot_partial_dependence",
14 | "permutation_importance",
15 | "PartialDependenceDisplay",
16 | ]
17 |
--------------------------------------------------------------------------------
/sklearn/inspection/_plot/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/inspection/_plot/__init__.py
--------------------------------------------------------------------------------
/sklearn/inspection/_plot/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/inspection/_plot/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/inspection/setup.py:
--------------------------------------------------------------------------------
1 | from numpy.distutils.misc_util import Configuration
2 |
3 |
4 | def configuration(parent_package="", top_path=None):
5 | config = Configuration("inspection", parent_package, top_path)
6 |
7 | config.add_subpackage("_plot")
8 | config.add_subpackage("_plot.tests")
9 |
10 | config.add_subpackage("tests")
11 |
12 | return config
13 |
14 |
15 | if __name__ == "__main__":
16 | from numpy.distutils.core import setup
17 |
18 | setup(**configuration().todict())
19 |
--------------------------------------------------------------------------------
/sklearn/inspection/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/inspection/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/linear_model/_glm/__init__.py:
--------------------------------------------------------------------------------
1 | # License: BSD 3 clause
2 |
3 | from .glm import (
4 | GeneralizedLinearRegressor,
5 | PoissonRegressor,
6 | GammaRegressor,
7 | TweedieRegressor,
8 | )
9 |
10 | __all__ = [
11 | "GeneralizedLinearRegressor",
12 | "PoissonRegressor",
13 | "GammaRegressor",
14 | "TweedieRegressor",
15 | ]
16 |
--------------------------------------------------------------------------------
/sklearn/linear_model/_glm/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # License: BSD 3 clause
2 |
--------------------------------------------------------------------------------
/sklearn/linear_model/_sgd_fast.pxd:
--------------------------------------------------------------------------------
1 | # License: BSD 3 clause
2 | """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx"""
3 |
4 | cdef class LossFunction:
5 | cdef double loss(self, double p, double y) nogil
6 | cdef double dloss(self, double p, double y) nogil
7 |
8 |
9 | cdef class Regression(LossFunction):
10 | cdef double loss(self, double p, double y) nogil
11 | cdef double dloss(self, double p, double y) nogil
12 |
13 |
14 | cdef class Classification(LossFunction):
15 | cdef double loss(self, double p, double y) nogil
16 | cdef double dloss(self, double p, double y) nogil
17 |
18 |
19 | cdef class Log(Classification):
20 | cdef double loss(self, double p, double y) nogil
21 | cdef double dloss(self, double p, double y) nogil
22 |
23 |
24 | cdef class SquaredLoss(Regression):
25 | cdef double loss(self, double p, double y) nogil
26 | cdef double dloss(self, double p, double y) nogil
27 |
--------------------------------------------------------------------------------
/sklearn/linear_model/_sgd_fast_helpers.h:
--------------------------------------------------------------------------------
1 | // We cannot directly reuse the npy_isfinite from npy_math.h as numpy
2 | // and scikit-learn are not necessarily built with the same compiler.
3 | // When re-declaring the functions in the template for cython
4 | // specific for each parameter input type, it needs to be 2 different functions
5 | // as cython doesn't support function overloading.
6 | #ifdef _MSC_VER
7 | # include
8 | # define skl_isfinite _finite
9 | # define skl_isfinite32 _finite
10 | # define skl_isfinite64 _finite
11 | #else
12 | # include
13 | # define skl_isfinite npy_isfinite
14 | # define skl_isfinite32 npy_isfinite
15 | # define skl_isfinite64 npy_isfinite
16 | #endif
17 |
--------------------------------------------------------------------------------
/sklearn/linear_model/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/linear_model/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/manifold/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn.manifold` module implements data embedding techniques.
3 | """
4 |
5 | from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding
6 | from ._isomap import Isomap
7 | from ._mds import MDS, smacof
8 | from ._spectral_embedding import SpectralEmbedding, spectral_embedding
9 | from ._t_sne import TSNE, trustworthiness
10 |
11 | __all__ = [
12 | "locally_linear_embedding",
13 | "LocallyLinearEmbedding",
14 | "Isomap",
15 | "MDS",
16 | "smacof",
17 | "SpectralEmbedding",
18 | "spectral_embedding",
19 | "TSNE",
20 | "trustworthiness",
21 | ]
22 |
--------------------------------------------------------------------------------
/sklearn/manifold/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy
4 |
5 |
6 | def configuration(parent_package="", top_path=None):
7 | from numpy.distutils.misc_util import Configuration
8 |
9 | config = Configuration("manifold", parent_package, top_path)
10 |
11 | libraries = []
12 | if os.name == "posix":
13 | libraries.append("m")
14 |
15 | config.add_extension(
16 | "_utils",
17 | sources=["_utils.pyx"],
18 | include_dirs=[numpy.get_include()],
19 | libraries=libraries,
20 | extra_compile_args=["-O3"],
21 | )
22 |
23 | config.add_extension(
24 | "_barnes_hut_tsne",
25 | sources=["_barnes_hut_tsne.pyx"],
26 | include_dirs=[numpy.get_include()],
27 | libraries=libraries,
28 | extra_compile_args=["-O3"],
29 | )
30 |
31 | config.add_subpackage("tests")
32 |
33 | return config
34 |
35 |
36 | if __name__ == "__main__":
37 | from numpy.distutils.core import setup
38 |
39 | setup(**configuration().todict())
40 |
--------------------------------------------------------------------------------
/sklearn/manifold/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/manifold/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/metrics/_plot/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/metrics/_plot/__init__.py
--------------------------------------------------------------------------------
/sklearn/metrics/_plot/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/metrics/_plot/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/metrics/cluster/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy
4 | from numpy.distutils.misc_util import Configuration
5 |
6 |
7 | def configuration(parent_package="", top_path=None):
8 | config = Configuration("cluster", parent_package, top_path)
9 | libraries = []
10 | if os.name == "posix":
11 | libraries.append("m")
12 | config.add_extension(
13 | "_expected_mutual_info_fast",
14 | sources=["_expected_mutual_info_fast.pyx"],
15 | include_dirs=[numpy.get_include()],
16 | libraries=libraries,
17 | )
18 |
19 | config.add_subpackage("tests")
20 |
21 | return config
22 |
23 |
24 | if __name__ == "__main__":
25 | from numpy.distutils.core import setup
26 |
27 | setup(**configuration().todict())
28 |
--------------------------------------------------------------------------------
/sklearn/metrics/cluster/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/metrics/cluster/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/metrics/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/metrics/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/mixture/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn.mixture` module implements mixture modeling algorithms.
3 | """
4 |
5 | from ._gaussian_mixture import GaussianMixture
6 | from ._bayesian_mixture import BayesianGaussianMixture
7 |
8 |
9 | __all__ = ["GaussianMixture", "BayesianGaussianMixture"]
10 |
--------------------------------------------------------------------------------
/sklearn/mixture/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/mixture/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/mixture/tests/test_mixture.py:
--------------------------------------------------------------------------------
1 | # Author: Guillaume Lemaitre
2 | # License: BSD 3 clause
3 |
4 | import pytest
5 | import numpy as np
6 |
7 | from sklearn.mixture import GaussianMixture
8 | from sklearn.mixture import BayesianGaussianMixture
9 |
10 |
11 | @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()])
12 | def test_gaussian_mixture_n_iter(estimator):
13 | # check that n_iter is the number of iteration performed.
14 | rng = np.random.RandomState(0)
15 | X = rng.rand(10, 5)
16 | max_iter = 1
17 | estimator.set_params(max_iter=max_iter)
18 | estimator.fit(X)
19 | assert estimator.n_iter_ == max_iter
20 |
21 |
22 | @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()])
23 | def test_mixture_n_components_greater_than_n_samples_error(estimator):
24 | """Check error when n_components <= n_samples"""
25 | rng = np.random.RandomState(0)
26 | X = rng.rand(10, 5)
27 | estimator.set_params(n_components=12)
28 |
29 | msg = "Expected n_samples >= n_components"
30 | with pytest.raises(ValueError, match=msg):
31 | estimator.fit(X)
32 |
--------------------------------------------------------------------------------
/sklearn/model_selection/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/model_selection/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/model_selection/tests/common.py:
--------------------------------------------------------------------------------
1 | """
2 | Common utilities for testing model selection.
3 | """
4 |
5 | import numpy as np
6 |
7 | from sklearn.model_selection import KFold
8 |
9 |
10 | class OneTimeSplitter:
11 | """A wrapper to make KFold single entry cv iterator"""
12 |
13 | def __init__(self, n_splits=4, n_samples=99):
14 | self.n_splits = n_splits
15 | self.n_samples = n_samples
16 | self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples)))
17 |
18 | def split(self, X=None, y=None, groups=None):
19 | """Split can be called only once"""
20 | for index in self.indices:
21 | yield index
22 |
23 | def get_n_splits(self, X=None, y=None, groups=None):
24 | return self.n_splits
25 |
--------------------------------------------------------------------------------
/sklearn/neighbors/_distance_metric.py:
--------------------------------------------------------------------------------
1 | # TODO: Remove this file in 1.3
2 | import warnings
3 |
4 | from ..metrics import DistanceMetric as _DistanceMetric
5 |
6 |
7 | class DistanceMetric(_DistanceMetric):
8 | @classmethod
9 | def _warn(cls):
10 | warnings.warn(
11 | "sklearn.neighbors.DistanceMetric has been moved "
12 | "to sklearn.metrics.DistanceMetric in 1.0. "
13 | "This import path will be removed in 1.3",
14 | category=FutureWarning,
15 | )
16 |
17 | @classmethod
18 | def get_metric(cls, metric, **kwargs):
19 | DistanceMetric._warn()
20 | return _DistanceMetric.get_metric(metric, **kwargs)
21 |
--------------------------------------------------------------------------------
/sklearn/neighbors/_partition_nodes.pxd:
--------------------------------------------------------------------------------
1 | from ..utils._typedefs cimport DTYPE_t, ITYPE_t
2 |
3 | cdef int partition_node_indices(
4 | DTYPE_t *data,
5 | ITYPE_t *node_indices,
6 | ITYPE_t split_dim,
7 | ITYPE_t split_index,
8 | ITYPE_t n_features,
9 | ITYPE_t n_points) except -1
10 |
--------------------------------------------------------------------------------
/sklearn/neighbors/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | def configuration(parent_package="", top_path=None):
5 | import numpy
6 | from numpy.distutils.misc_util import Configuration
7 |
8 | config = Configuration("neighbors", parent_package, top_path)
9 | libraries = []
10 | if os.name == "posix":
11 | libraries.append("m")
12 |
13 | config.add_extension(
14 | "_ball_tree",
15 | sources=["_ball_tree.pyx"],
16 | include_dirs=[numpy.get_include()],
17 | libraries=libraries,
18 | )
19 |
20 | config.add_extension(
21 | "_kd_tree",
22 | sources=["_kd_tree.pyx"],
23 | include_dirs=[numpy.get_include()],
24 | libraries=libraries,
25 | )
26 |
27 | config.add_extension(
28 | "_partition_nodes",
29 | sources=["_partition_nodes.pyx"],
30 | include_dirs=[numpy.get_include()],
31 | language="c++",
32 | libraries=libraries,
33 | )
34 |
35 | config.add_extension(
36 | "_quad_tree",
37 | sources=["_quad_tree.pyx"],
38 | include_dirs=[numpy.get_include()],
39 | libraries=libraries,
40 | )
41 |
42 | config.add_subpackage("tests")
43 |
44 | return config
45 |
--------------------------------------------------------------------------------
/sklearn/neighbors/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/neighbors/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/neighbors/tests/test_kd_tree.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 | from joblib import Parallel
4 | from sklearn.utils.fixes import delayed
5 |
6 | from sklearn.neighbors._kd_tree import KDTree
7 |
8 | DIMENSION = 3
9 |
10 | METRICS = {"euclidean": {}, "manhattan": {}, "chebyshev": {}, "minkowski": dict(p=3)}
11 |
12 |
13 | def test_array_object_type():
14 | """Check that we do not accept object dtype array."""
15 | X = np.array([(1, 2, 3), (2, 5), (5, 5, 1, 2)], dtype=object)
16 | with pytest.raises(ValueError, match="setting an array element with a sequence"):
17 | KDTree(X)
18 |
19 |
20 | def test_kdtree_picklable_with_joblib():
21 | """Make sure that KDTree queries work when joblib memmaps.
22 |
23 | Non-regression test for #21685 and #21228."""
24 | rng = np.random.RandomState(0)
25 | X = rng.random_sample((10, 3))
26 | tree = KDTree(X, leaf_size=2)
27 |
28 | # Call Parallel with max_nbytes=1 to trigger readonly memory mapping that
29 | # use to raise "ValueError: buffer source array is read-only" in a previous
30 | # version of the Cython code.
31 | Parallel(n_jobs=2, max_nbytes=1)(delayed(tree.query)(data) for data in 2 * [X])
32 |
--------------------------------------------------------------------------------
/sklearn/neural_network/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn.neural_network` module includes models based on neural
3 | networks.
4 | """
5 |
6 | # License: BSD 3 clause
7 |
8 | from ._rbm import BernoulliRBM
9 |
10 | from ._multilayer_perceptron import MLPClassifier
11 | from ._multilayer_perceptron import MLPRegressor
12 |
13 | __all__ = ["BernoulliRBM", "MLPClassifier", "MLPRegressor"]
14 |
--------------------------------------------------------------------------------
/sklearn/neural_network/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/neural_network/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/neural_network/tests/test_base.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from sklearn.neural_network._base import binary_log_loss
5 | from sklearn.neural_network._base import log_loss
6 |
7 |
8 | def test_binary_log_loss_1_prob_finite():
9 | # y_proba is equal to one should result in a finite logloss
10 | y_true = np.array([[0, 0, 1]]).T
11 | y_prob = np.array([[0.9, 1.0, 1.0]]).T
12 |
13 | loss = binary_log_loss(y_true, y_prob)
14 | assert np.isfinite(loss)
15 |
16 |
17 | @pytest.mark.parametrize(
18 | "y_true, y_prob",
19 | [
20 | (
21 | np.array([[1, 0, 0], [0, 1, 0]]),
22 | np.array([[0.0, 1.0, 0.0], [0.9, 0.05, 0.05]]),
23 | ),
24 | (np.array([[0, 0, 1]]).T, np.array([[0.9, 1.0, 1.0]]).T),
25 | ],
26 | )
27 | def test_log_loss_1_prob_finite(y_true, y_prob):
28 | # y_proba is equal to 1 should result in a finite logloss
29 | loss = log_loss(y_true, y_prob)
30 | assert np.isfinite(loss)
31 |
--------------------------------------------------------------------------------
/sklearn/preprocessing/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | def configuration(parent_package="", top_path=None):
5 | import numpy
6 | from numpy.distutils.misc_util import Configuration
7 |
8 | config = Configuration("preprocessing", parent_package, top_path)
9 | libraries = []
10 | if os.name == "posix":
11 | libraries.append("m")
12 |
13 | config.add_extension(
14 | "_csr_polynomial_expansion",
15 | sources=["_csr_polynomial_expansion.pyx"],
16 | include_dirs=[numpy.get_include()],
17 | libraries=libraries,
18 | )
19 |
20 | config.add_subpackage("tests")
21 |
22 | return config
23 |
--------------------------------------------------------------------------------
/sklearn/preprocessing/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/preprocessing/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/semi_supervised/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn.semi_supervised` module implements semi-supervised learning
3 | algorithms. These algorithms utilize small amounts of labeled data and large
4 | amounts of unlabeled data for classification tasks. This module includes Label
5 | Propagation.
6 | """
7 |
8 | from ._label_propagation import LabelPropagation, LabelSpreading
9 | from ._self_training import SelfTrainingClassifier
10 |
11 | __all__ = ["SelfTrainingClassifier", "LabelPropagation", "LabelSpreading"]
12 |
--------------------------------------------------------------------------------
/sklearn/semi_supervised/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/semi_supervised/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/svm/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn.svm` module includes Support Vector Machine algorithms.
3 | """
4 |
5 | # See http://scikit-learn.sourceforge.net/modules/svm.html for complete
6 | # documentation.
7 |
8 | # Author: Fabian Pedregosa with help from
9 | # the scikit-learn community. LibSVM and LibLinear are copyright
10 | # of their respective owners.
11 | # License: BSD 3 clause (C) INRIA 2010
12 |
13 | from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR
14 | from ._bounds import l1_min_c
15 |
16 | __all__ = [
17 | "LinearSVC",
18 | "LinearSVR",
19 | "NuSVC",
20 | "NuSVR",
21 | "OneClassSVM",
22 | "SVC",
23 | "SVR",
24 | "l1_min_c",
25 | ]
26 |
--------------------------------------------------------------------------------
/sklearn/svm/_newrand.pyx:
--------------------------------------------------------------------------------
1 | """Wrapper for newrand.h"""
2 |
3 | cdef extern from "newrand.h":
4 | void set_seed(unsigned int)
5 | unsigned int bounded_rand_int(unsigned int)
6 |
7 | def set_seed_wrap(unsigned int custom_seed):
8 | set_seed(custom_seed)
9 |
10 | def bounded_rand_int_wrap(unsigned int range_):
11 | return bounded_rand_int(range_)
12 |
--------------------------------------------------------------------------------
/sklearn/svm/src/liblinear/_cython_blas_helpers.h:
--------------------------------------------------------------------------------
1 | #ifndef _CYTHON_BLAS_HELPERS_H
2 | #define _CYTHON_BLAS_HELPERS_H
3 |
4 | typedef double (*dot_func)(int, double*, int, double*, int);
5 | typedef void (*axpy_func)(int, double, double*, int, double*, int);
6 | typedef void (*scal_func)(int, double, double*, int);
7 | typedef double (*nrm2_func)(int, double*, int);
8 |
9 | typedef struct BlasFunctions{
10 | dot_func dot;
11 | axpy_func axpy;
12 | scal_func scal;
13 | nrm2_func nrm2;
14 | } BlasFunctions;
15 |
16 | #endif
17 |
--------------------------------------------------------------------------------
/sklearn/svm/src/liblinear/tron.h:
--------------------------------------------------------------------------------
1 | #ifndef _TRON_H
2 | #define _TRON_H
3 |
4 | #include "_cython_blas_helpers.h"
5 |
6 | class function
7 | {
8 | public:
9 | virtual double fun(double *w) = 0 ;
10 | virtual void grad(double *w, double *g) = 0 ;
11 | virtual void Hv(double *s, double *Hs) = 0 ;
12 |
13 | virtual int get_nr_variable(void) = 0 ;
14 | virtual ~function(void){}
15 | };
16 |
17 | class TRON
18 | {
19 | public:
20 | TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000, BlasFunctions *blas = 0);
21 | ~TRON();
22 |
23 | int tron(double *w);
24 | void set_print_string(void (*i_print) (const char *buf));
25 |
26 | private:
27 | int trcg(double delta, double *g, double *s, double *r);
28 | double norm_inf(int n, double *x);
29 |
30 | double eps;
31 | int max_iter;
32 | function *fun_obj;
33 | BlasFunctions *blas;
34 | void info(const char *fmt,...);
35 | void (*tron_print_string)(const char *buf);
36 | };
37 | #endif
38 |
--------------------------------------------------------------------------------
/sklearn/svm/src/libsvm/LIBSVM_CHANGES:
--------------------------------------------------------------------------------
1 | Changes to Libsvm
2 |
3 | This is here mainly as checklist for incorporation of new versions of libsvm.
4 |
5 | * Add copyright to files svm.cpp and svm.h
6 | * Add random_seed support and call to srand in fit function
7 | * Improved random number generator (fix on windows, enhancement on other
8 | platforms). See
9 | * invoke scipy blas api for svm kernel function to improve performance with speedup rate of 1.5X to 2X for dense data only. See
10 | * Expose the number of iterations run in optimization. See
11 | The changes made with respect to upstream are detailed in the heading of svm.cpp
12 |
--------------------------------------------------------------------------------
/sklearn/svm/src/libsvm/_svm_cython_blas_helpers.h:
--------------------------------------------------------------------------------
1 | #ifndef _SVM_CYTHON_BLAS_HELPERS_H
2 | #define _SVM_CYTHON_BLAS_HELPERS_H
3 |
4 | typedef double (*dot_func)(int, double*, int, double*, int);
5 | typedef struct BlasFunctions{
6 | dot_func dot;
7 | } BlasFunctions;
8 |
9 | #endif
10 |
--------------------------------------------------------------------------------
/sklearn/svm/src/libsvm/libsvm_template.cpp:
--------------------------------------------------------------------------------
1 |
2 | /* this is a hack to generate libsvm with both sparse and dense
3 | methods in the same binary*/
4 |
5 | #define _DENSE_REP
6 | #include "svm.cpp"
7 | #undef _DENSE_REP
8 | #include "svm.cpp"
9 |
--------------------------------------------------------------------------------
/sklearn/svm/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/svm/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/tests/test_check_build.py:
--------------------------------------------------------------------------------
1 | """
2 | Smoke Test the check_build module
3 | """
4 |
5 | # Author: G Varoquaux
6 | # License: BSD 3 clause
7 |
8 | import pytest
9 |
10 | from sklearn.__check_build import raise_build_error
11 |
12 |
13 | def test_raise_build_error():
14 | with pytest.raises(ImportError):
15 | raise_build_error(ImportError())
16 |
--------------------------------------------------------------------------------
/sklearn/tests/test_init.py:
--------------------------------------------------------------------------------
1 | # Basic unittests to test functioning of module's top-level
2 |
3 |
4 | __author__ = "Yaroslav Halchenko"
5 | __license__ = "BSD"
6 |
7 |
8 | try:
9 | from sklearn import * # noqa
10 |
11 | _top_import_error = None
12 | except Exception as e:
13 | _top_import_error = e
14 |
15 |
16 | def test_import_skl():
17 | # Test either above import has failed for some reason
18 | # "import *" is discouraged outside of the module level, hence we
19 | # rely on setting up the variable above
20 | assert _top_import_error is None
21 |
--------------------------------------------------------------------------------
/sklearn/tree/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`sklearn.tree` module includes decision tree-based models for
3 | classification and regression.
4 | """
5 |
6 | from ._classes import BaseDecisionTree
7 | from ._classes import DecisionTreeClassifier
8 | from ._classes import DecisionTreeRegressor
9 | from ._classes import ExtraTreeClassifier
10 | from ._classes import ExtraTreeRegressor
11 | from ._export import export_graphviz, plot_tree, export_text
12 |
13 | __all__ = [
14 | "BaseDecisionTree",
15 | "DecisionTreeClassifier",
16 | "DecisionTreeRegressor",
17 | "ExtraTreeClassifier",
18 | "ExtraTreeRegressor",
19 | "export_graphviz",
20 | "plot_tree",
21 | "export_text",
22 | ]
23 |
--------------------------------------------------------------------------------
/sklearn/tree/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/tree/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/utils/_arpack.py:
--------------------------------------------------------------------------------
1 | from .validation import check_random_state
2 |
3 |
4 | def _init_arpack_v0(size, random_state):
5 | """Initialize the starting vector for iteration in ARPACK functions.
6 |
7 | Initialize a ndarray with values sampled from the uniform distribution on
8 | [-1, 1]. This initialization model has been chosen to be consistent with
9 | the ARPACK one as another initialization can lead to convergence issues.
10 |
11 | Parameters
12 | ----------
13 | size : int
14 | The size of the eigenvalue vector to be initialized.
15 |
16 | random_state : int, RandomState instance or None, default=None
17 | The seed of the pseudo random number generator used to generate a
18 | uniform distribution. If int, random_state is the seed used by the
19 | random number generator; If RandomState instance, random_state is the
20 | random number generator; If None, the random number generator is the
21 | RandomState instance used by `np.random`.
22 |
23 | Returns
24 | -------
25 | v0 : ndarray of shape (size,)
26 | The initialized vector.
27 | """
28 | random_state = check_random_state(random_state)
29 | v0 = random_state.uniform(-1, 1, size)
30 | return v0
31 |
--------------------------------------------------------------------------------
/sklearn/utils/_fast_dict.pxd:
--------------------------------------------------------------------------------
1 | # Author: Gael Varoquaux
2 | # License: BSD
3 | """
4 | Uses C++ map containers for fast dict-like behavior with keys being
5 | integers, and values float.
6 | """
7 |
8 | from libcpp.map cimport map as cpp_map
9 |
10 | # Import the C-level symbols of numpy
11 | cimport numpy as np
12 |
13 | ctypedef np.float64_t DTYPE_t
14 |
15 | ctypedef np.intp_t ITYPE_t
16 |
17 | ###############################################################################
18 | # An object to be used in Python
19 |
20 | cdef class IntFloatDict:
21 | cdef cpp_map[ITYPE_t, DTYPE_t] my_map
22 | cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values)
23 |
--------------------------------------------------------------------------------
/sklearn/utils/_heap.pxd:
--------------------------------------------------------------------------------
1 | # Heap routines, used in various Cython implementations.
2 |
3 | from cython cimport floating
4 |
5 | from ._typedefs cimport ITYPE_t
6 |
7 | cdef int simultaneous_sort(
8 | floating* dist,
9 | ITYPE_t* idx,
10 | ITYPE_t size
11 | ) nogil
12 |
13 | cdef int heap_push(
14 | floating* values,
15 | ITYPE_t* indices,
16 | ITYPE_t size,
17 | floating val,
18 | ITYPE_t val_idx,
19 | ) nogil
20 |
--------------------------------------------------------------------------------
/sklearn/utils/_joblib.py:
--------------------------------------------------------------------------------
1 | import warnings as _warnings
2 |
3 | with _warnings.catch_warnings():
4 | _warnings.simplefilter("ignore")
5 | # joblib imports may raise DeprecationWarning on certain Python
6 | # versions
7 | import joblib
8 | from joblib import logger
9 | from joblib import dump, load
10 | from joblib import __version__
11 | from joblib import effective_n_jobs
12 | from joblib import hash
13 | from joblib import cpu_count, Parallel, Memory, delayed
14 | from joblib import parallel_backend, register_parallel_backend
15 |
16 |
17 | __all__ = [
18 | "parallel_backend",
19 | "register_parallel_backend",
20 | "cpu_count",
21 | "Parallel",
22 | "Memory",
23 | "delayed",
24 | "effective_n_jobs",
25 | "hash",
26 | "logger",
27 | "dump",
28 | "load",
29 | "joblib",
30 | "__version__",
31 | ]
32 |
--------------------------------------------------------------------------------
/sklearn/utils/_logistic_sigmoid.pyx:
--------------------------------------------------------------------------------
1 | from libc.math cimport log, exp
2 |
3 | import numpy as np
4 | cimport numpy as np
5 |
6 | np.import_array()
7 | ctypedef np.float64_t DTYPE_t
8 |
9 |
10 | cdef inline DTYPE_t _inner_log_logistic_sigmoid(const DTYPE_t x):
11 | """Log of the logistic sigmoid function log(1 / (1 + e ** -x))"""
12 | if x > 0:
13 | return -log(1. + exp(-x))
14 | else:
15 | return x - log(1. + exp(x))
16 |
17 |
18 | def _log_logistic_sigmoid(unsigned int n_samples,
19 | unsigned int n_features,
20 | DTYPE_t[:, :] X,
21 | DTYPE_t[:, :] out):
22 | cdef:
23 | unsigned int i
24 | unsigned int j
25 |
26 | for i in range(n_samples):
27 | for j in range(n_features):
28 | out[i, j] = _inner_log_logistic_sigmoid(X[i, j])
29 | return out
30 |
--------------------------------------------------------------------------------
/sklearn/utils/_openmp_helpers.pxd:
--------------------------------------------------------------------------------
1 | # Helpers to access OpenMP threads information
2 | #
3 | # Those interfaces act as indirections which allows the non-support of OpenMP
4 | # for implementations which have been written for it.
5 |
6 | cdef int _openmp_thread_num() nogil
7 |
--------------------------------------------------------------------------------
/sklearn/utils/_typedefs.pxd:
--------------------------------------------------------------------------------
1 | #!python
2 | cimport numpy as np
3 |
4 | # Floating point/data type
5 | ctypedef np.float64_t DTYPE_t # WARNING: should match DTYPE in typedefs.pyx
6 |
7 | cdef enum:
8 | DTYPECODE = np.NPY_FLOAT64
9 | ITYPECODE = np.NPY_INTP
10 |
11 | # Index/integer type.
12 | # WARNING: ITYPE_t must be a signed integer type or you will have a bad time!
13 | ctypedef np.intp_t ITYPE_t # WARNING: should match ITYPE in typedefs.pyx
14 |
--------------------------------------------------------------------------------
/sklearn/utils/_typedefs.pyx:
--------------------------------------------------------------------------------
1 | #!python
2 |
3 | import numpy as np
4 | cimport numpy as np
5 | from libc.math cimport sqrt
6 |
7 | np.import_array()
8 |
9 |
10 | # use a hack to determine the associated numpy data types
11 | # NOTE: the following requires the buffer interface, only available in
12 | # numpy 1.5+. We'll choose the DTYPE by hand instead.
13 | #cdef ITYPE_t idummy
14 | #cdef ITYPE_t[:] idummy_view = &idummy
15 | #ITYPE = np.asarray(idummy_view).dtype
16 | ITYPE = np.intp # WARNING: this should match ITYPE_t in typedefs.pxd
17 |
18 | #cdef DTYPE_t ddummy
19 | #cdef DTYPE_t[:] ddummy_view = &ddummy
20 | #DTYPE = np.asarray(ddummy_view).dtype
21 | DTYPE = np.float64 # WARNING: this should match DTYPE_t in typedefs.pxd
22 |
23 | # some handy constants
24 | cdef DTYPE_t INF = np.inf
25 | cdef DTYPE_t PI = np.pi
26 | cdef DTYPE_t ROOT_2PI = sqrt(2 * PI)
27 |
--------------------------------------------------------------------------------
/sklearn/utils/murmurhash.pxd:
--------------------------------------------------------------------------------
1 | """Export fast murmurhash C/C++ routines + cython wrappers"""
2 |
3 | cimport numpy as np
4 |
5 | # The C API is disabled for now, since it requires -I flags to get
6 | # compilation to work even when these functions are not used.
7 | #cdef extern from "MurmurHash3.h":
8 | # void MurmurHash3_x86_32(void* key, int len, unsigned int seed,
9 | # void* out)
10 | #
11 | # void MurmurHash3_x86_128(void* key, int len, unsigned int seed,
12 | # void* out)
13 | #
14 | # void MurmurHash3_x64_128(void* key, int len, unsigned int seed,
15 | # void* out)
16 |
17 |
18 | cpdef np.uint32_t murmurhash3_int_u32(int key, unsigned int seed)
19 | cpdef np.int32_t murmurhash3_int_s32(int key, unsigned int seed)
20 | cpdef np.uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed)
21 | cpdef np.int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed)
22 |
--------------------------------------------------------------------------------
/sklearn/utils/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/utils/tests/__init__.py
--------------------------------------------------------------------------------
/sklearn/utils/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import sklearn
4 |
5 |
6 | @pytest.fixture
7 | def print_changed_only_false():
8 | sklearn.set_config(print_changed_only=False)
9 | yield
10 | sklearn.set_config(print_changed_only=True) # reset to default
11 |
--------------------------------------------------------------------------------
/sklearn/utils/tests/test_arpack.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from numpy.testing import assert_allclose
3 |
4 | from sklearn.utils import check_random_state
5 | from sklearn.utils._arpack import _init_arpack_v0
6 |
7 |
8 | @pytest.mark.parametrize("seed", range(100))
9 | def test_init_arpack_v0(seed):
10 | # check that the initialization a sampling from an uniform distribution
11 | # where we can fix the random state
12 | size = 1000
13 | v0 = _init_arpack_v0(size, seed)
14 |
15 | rng = check_random_state(seed)
16 | assert_allclose(v0, rng.uniform(-1, 1, size=size))
17 |
--------------------------------------------------------------------------------
/sklearn/utils/tests/test_arrayfuncs.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from sklearn.utils._testing import assert_allclose
5 | from sklearn.utils.arrayfuncs import min_pos
6 |
7 |
8 | def test_min_pos():
9 | # Check that min_pos returns a positive value and that it's consistent
10 | # between float and double
11 | X = np.random.RandomState(0).randn(100)
12 |
13 | min_double = min_pos(X)
14 | min_float = min_pos(X.astype(np.float32))
15 |
16 | assert_allclose(min_double, min_float)
17 | assert min_double >= 0
18 |
19 |
20 | @pytest.mark.parametrize("dtype", [np.float32, np.float64])
21 | def test_min_pos_no_positive(dtype):
22 | # Check that the return value of min_pos is the maximum representable
23 | # value of the input dtype when all input elements are <= 0 (#19328)
24 | X = np.full(100, -1.0).astype(dtype, copy=False)
25 |
26 | assert min_pos(X) == np.finfo(dtype).max
27 |
--------------------------------------------------------------------------------
/sklearn/utils/tests/test_cython_templating.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import pytest
3 | import sklearn
4 |
5 |
6 | def test_files_generated_by_templates_are_git_ignored():
7 | """Check the consistence of the files generated from template files."""
8 | gitignore_file = pathlib.Path(sklearn.__file__).parent.parent / ".gitignore"
9 | if not gitignore_file.exists():
10 | pytest.skip("Tests are not run from the source folder")
11 |
12 | base_dir = pathlib.Path(sklearn.__file__).parent
13 | ignored_files = gitignore_file.read_text().split("\n")
14 | ignored_files = [pathlib.Path(line) for line in ignored_files]
15 |
16 | for filename in base_dir.glob("**/*.tp"):
17 | filename = filename.relative_to(base_dir.parent)
18 | # From "path/to/template.p??.tp" to "path/to/template.p??"
19 | filename_wo_tempita_suffix = filename.with_suffix("")
20 | assert filename_wo_tempita_suffix in ignored_files
21 |
--------------------------------------------------------------------------------
/sklearn/utils/tests/test_fast_dict.py:
--------------------------------------------------------------------------------
1 | """ Test fast_dict.
2 | """
3 | import numpy as np
4 |
5 | from sklearn.utils._fast_dict import IntFloatDict, argmin
6 |
7 |
8 | def test_int_float_dict():
9 | rng = np.random.RandomState(0)
10 | keys = np.unique(rng.randint(100, size=10).astype(np.intp))
11 | values = rng.rand(len(keys))
12 |
13 | d = IntFloatDict(keys, values)
14 | for key, value in zip(keys, values):
15 | assert d[key] == value
16 | assert len(d) == len(keys)
17 |
18 | d.append(120, 3.0)
19 | assert d[120] == 3.0
20 | assert len(d) == len(keys) + 1
21 | for i in range(2000):
22 | d.append(i + 1000, 4.0)
23 | assert d[1100] == 4.0
24 |
25 |
26 | def test_int_float_dict_argmin():
27 | # Test the argmin implementation on the IntFloatDict
28 | keys = np.arange(100, dtype=np.intp)
29 | values = np.arange(100, dtype=np.float64)
30 | d = IntFloatDict(keys, values)
31 | assert argmin(d) == (0, 0)
32 |
--------------------------------------------------------------------------------
/sklearn/utils/tests/test_optimize.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from sklearn.utils.optimize import _newton_cg
4 | from scipy.optimize import fmin_ncg
5 |
6 | from sklearn.utils._testing import assert_array_almost_equal
7 |
8 |
9 | def test_newton_cg():
10 | # Test that newton_cg gives same result as scipy's fmin_ncg
11 |
12 | rng = np.random.RandomState(0)
13 | A = rng.normal(size=(10, 10))
14 | x0 = np.ones(10)
15 |
16 | def func(x):
17 | Ax = A.dot(x)
18 | return 0.5 * (Ax).dot(Ax)
19 |
20 | def grad(x):
21 | return A.T.dot(A.dot(x))
22 |
23 | def hess(x, p):
24 | return p.dot(A.T.dot(A.dot(x.all())))
25 |
26 | def grad_hess(x):
27 | return grad(x), lambda x: A.T.dot(A.dot(x))
28 |
29 | assert_array_almost_equal(
30 | _newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0],
31 | fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess),
32 | )
33 |
--------------------------------------------------------------------------------
/sklearn/utils/tests/test_parallel.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from joblib import Parallel
3 |
4 | from numpy.testing import assert_array_equal
5 |
6 | from sklearn._config import config_context, get_config
7 | from sklearn.utils.fixes import delayed
8 |
9 |
10 | def get_working_memory():
11 | return get_config()["working_memory"]
12 |
13 |
14 | @pytest.mark.parametrize("n_jobs", [1, 2])
15 | @pytest.mark.parametrize("backend", ["loky", "threading", "multiprocessing"])
16 | def test_configuration_passes_through_to_joblib(n_jobs, backend):
17 | # Tests that the global global configuration is passed to joblib jobs
18 |
19 | with config_context(working_memory=123):
20 | results = Parallel(n_jobs=n_jobs, backend=backend)(
21 | delayed(get_working_memory)() for _ in range(2)
22 | )
23 |
24 | assert_array_equal(results, [123] * 2)
25 |
--------------------------------------------------------------------------------
/sklearn/utils/tests/test_show_versions.py:
--------------------------------------------------------------------------------
1 | from sklearn.utils.fixes import threadpool_info
2 | from sklearn.utils._show_versions import _get_sys_info
3 | from sklearn.utils._show_versions import _get_deps_info
4 | from sklearn.utils._show_versions import show_versions
5 | from sklearn.utils._testing import ignore_warnings
6 |
7 |
8 | def test_get_sys_info():
9 | sys_info = _get_sys_info()
10 |
11 | assert "python" in sys_info
12 | assert "executable" in sys_info
13 | assert "machine" in sys_info
14 |
15 |
16 | def test_get_deps_info():
17 | with ignore_warnings():
18 | deps_info = _get_deps_info()
19 |
20 | assert "pip" in deps_info
21 | assert "setuptools" in deps_info
22 | assert "sklearn" in deps_info
23 | assert "numpy" in deps_info
24 | assert "scipy" in deps_info
25 | assert "Cython" in deps_info
26 | assert "pandas" in deps_info
27 | assert "matplotlib" in deps_info
28 | assert "joblib" in deps_info
29 |
30 |
31 | def test_show_versions(capsys):
32 | with ignore_warnings():
33 | show_versions()
34 | out, err = capsys.readouterr()
35 |
36 | assert "python" in out
37 | assert "numpy" in out
38 |
39 | info = threadpool_info()
40 | if info:
41 | assert "threadpoolctl info:" in out
42 |
--------------------------------------------------------------------------------
/sklearn/utils/tests/test_weight_vector.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 | from sklearn.utils._weight_vector import (
4 | WeightVector32,
5 | WeightVector64,
6 | )
7 |
8 |
9 | @pytest.mark.parametrize(
10 | "dtype, WeightVector",
11 | [
12 | (np.float32, WeightVector32),
13 | (np.float64, WeightVector64),
14 | ],
15 | )
16 | def test_type_invariance(dtype, WeightVector):
17 | """Check the `dtype` consistency of `WeightVector`."""
18 | weights = np.random.rand(100).astype(dtype)
19 | average_weights = np.random.rand(100).astype(dtype)
20 |
21 | weight_vector = WeightVector(weights, average_weights)
22 |
23 | assert np.asarray(weight_vector.w).dtype is np.dtype(dtype)
24 | assert np.asarray(weight_vector.aw).dtype is np.dtype(dtype)
25 |
--------------------------------------------------------------------------------